Example #1
0
    def test_extra_columns(self):
        """Test with an extra column in the input dataframe.
           In this case the extra column gets removed when the table
           is pivoted.
        """
        # Set up expected dataframe.
        expected_data = [[280., np.nan], [np.nan, 281.]]
        expected_df = pd.DataFrame(expected_data, columns=["T+000", "T+001"])
        expected_df.columns.name = "forecast_period"

        # Set up cube with multiple lead times in.
        second_cube = set_up_spot_cube(
            281,
            number_of_sites=1,
            validity_time=1487311200 + 3600,
            forecast_period=1,
        )

        merged_cube = CubeList([self.cube, second_cube])
        merged_cube = merged_cube.concatenate()
        # Set up input dataframe
        data = [[1487311200, 3001, 280.], [1487311200 + 3600, 3002, 281.]]
        columns = ["time", "wmo_site", "values"]
        input_df = pd.DataFrame(data, columns=columns)
        result = self.plugin.pivot_table(merged_cube[0], input_df)
        assert_frame_equal(expected_df, result)
def interpolate_alongacross(Processes,
                            Track,
                            cell,
                            dx=500,
                            width=10000,
                            z_coord='model_level_number',
                            height_levels=np.arange(2, 20000, 2000)):
    '''
    function description
    '''
    from iris import Constraint
    from iris.cube import CubeList
    Track_cell = Track[Track['cell'] == cell]
    time = Track_cell['time'].values
    x = Track_cell['projection_x_coordinate'].values
    y = Track_cell['projection_y_coordinate'].values
    alpha = calculate_alpha_all(x, y)

    cubelist_Processes_along = CubeList()
    cubelist_Processes_across = CubeList()

    for i, time_i in enumerate(time):
        logging.debug(time_i)
        constraint_time = Constraint(time=time_i)
        grid_along, grid_across = make_cubes_alongacross(
            x=x[i],
            y=y[i],
            alpha=alpha[i],
            cube_sample=Processes.extract(constraint_time)[0],
            dx=dx,
            width=width,
            z_coord=z_coord,
            height_levels=height_levels)

        Processes_along_i, Processes_across_i = interpolate_to_cubes(
            Processes.extract(constraint_time),
            grid_along,
            grid_across,
            z_coord=z_coord)

        cubelist_Processes_along.extend(Processes_along_i)
        cubelist_Processes_across.extend(Processes_across_i)
    Processes_along = cubelist_Processes_along.concatenate()
    Processes_across = cubelist_Processes_across.concatenate()

    return Processes_along, Processes_across
Example #3
0
 def setUp(self):
     """Set up the plugin and dataframe needed for these tests"""
     self.cube = set_up_spot_cube(280, number_of_sites=1)
     second_cube = self.cube.copy()
     second_cube.coord("percentile").points = np.array([60.0])
     cubelist = CubeList([self.cube, second_cube])
     self.cubes = cubelist.concatenate()
     self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                "IMPRO", 0)
def concatenate_nc_files(cubes: CubeList, filename: str) -> None:
    try:
        logger.info('Concatenating files...')
        new_cube = cubes.concatenate()
        logger.info(new_cube[0])
        output_nc_file = create_output_file(filename)
        logger.info(f'Saving {output_nc_file}...')
        iris.save(new_cube[0], output_nc_file)
    except Exception as e:
        raise MergeError(e)
    return output_nc_file
Example #5
0
 def test_exception_if_pivot_dim_set(self):
     """Test it raises an exception if a 2D cube is input and
        no pivot_dim."""
     plugin = SpotDatabase("csv",
                           "output",
                           "improver",
                           "time",
                           "IMPRO",
                           0,
                           pivot_dim="percentile")
     cube = set_up_spot_cube(280, number_of_sites=3)
     second_cube = cube.copy()
     second_cube.coord("percentile").points = np.array([60.0])
     cubelist = CubeList([cube, second_cube])
     cubes = cubelist.concatenate()
     message = "Dimensions that are not described by the pivot_dim or "\
               "coord_to_slice_over must only have one point in. "\
               "Dimension '2' has length '3' and is associated with the "\
               "'index' coordinate."
     with self.assertRaisesRegex(ValueError, message):
         plugin.check_input_dimensions(cubes[0])
Example #6
0
    def test_multiple_times_cube(self):
        """Test using one input cube, with one site and multiple times."""
        # Set up expected dataframe.
        expected_data = [[280., np.nan], [np.nan, 281.]]
        expected_df = pd.DataFrame(expected_data, columns=["T+000", "T+001"])
        expected_df.columns.name = "forecast_period"

        # Set up cube with multiple lead times in.
        second_cube = set_up_spot_cube(
            281,
            number_of_sites=1,
            validity_time=1487311200 + 3600,
            forecast_period=1,
        )

        merged_cube = CubeList([self.cube, second_cube])
        merged_cube = merged_cube.concatenate()
        # Set up input dataframe
        data = [[1487311200, 280.], [1487311200 + 3600, 281.]]
        columns = ["time", "values"]
        input_df = pd.DataFrame(data, columns=columns)
        result = self.plugin.pivot_table(merged_cube[0], input_df)
        assert_frame_equal(expected_df, result)
Example #7
0
def interpolate_alongacross_mean(Processes,
                                 Track,
                                 cell,
                                 dx,
                                 width,
                                 z_coord='model_level_number',
                                 height_level_borders=np.arange(
                                     0, 20000, 2000)):

    from iris import Constraint
    from iris.cube import CubeList
    Track_cell = Track[Track['cell'] == cell]
    time = Track_cell['time'].values
    x = Track_cell['projection_x_coordinate'].values
    y = Track_cell['projection_y_coordinate'].values
    alpha = calculate_alpha_all(x, y)

    cubelist_Processes_along = CubeList()
    cubelist_Processes_across = CubeList()

    for i, time_i in enumerate(time):
        logging.debug(time_i)

        constraint_time = Constraint(time=time_i)

        n_add_width = 2
        box_slice = [[
            x[i] - (width + n_add_width) * dx,
            x[i] + (width + n_add_width) * dx
        ],
                     [
                         y[i] - (width + n_add_width) * dx,
                         y[i] + (width + n_add_width) * dx
                     ]]

        x_min = box_slice[0][0]
        x_max = box_slice[0][1]
        y_min = box_slice[1][0]
        y_max = box_slice[1][1]

        constraint_x = Constraint(projection_x_coordinate=lambda cell: int(
            x_min) < cell < int(x_max))
        constraint_y = Constraint(projection_y_coordinate=lambda cell: int(
            y_min) < cell < int(y_max))

        constraint = constraint_time & constraint_x & constraint_y

        grid_along, grid_across = make_cubes_alongacross_mean(
            x=x[i],
            y=y[i],
            alpha=alpha[i],
            cube_sample=Processes.extract(constraint)[0],
            dx=dx,
            width=width,
            height_level_borders=height_level_borders)

        Processes_along_i, Processes_across_i = interpolate_to_cubes_mean(
            Processes.extract(constraint),
            grid_along,
            grid_across,
            height_level_borders=height_level_borders)

        cubelist_Processes_along.extend(Processes_along_i)
        cubelist_Processes_across.extend(Processes_across_i)
    Processes_along = cubelist_Processes_along.concatenate()
    Processes_across = cubelist_Processes_across.concatenate()

    return Processes_along, Processes_across
Example #8
0
def process_diagnostic(diagnostic,
                       neighbours,
                       sites,
                       forecast_times,
                       data_path,
                       ancillary_data,
                       output_path=None):
    """
    Extract data and write output for a given diagnostic.

    Args:
    -----
    diagnostic : string
        String naming the diagnostic to be processed.

    neighbours : numpy.array
        Array of neigbouring grid points that are associated with sites
        in the SortedDictionary of sites.

    sites : dict
        A dictionary containing the properties of spotdata sites.

    forecast_times : list[datetime.datetime objects]
        A list of datetimes representing forecast times for which data is
        required.

    data_path : string
        Path to diagnostic data files.

    ancillary_data : dict
        A dictionary containing additional model data that is needed.
        e.g. {'orography': <cube of orography>}

    output_path : str
        Path to which output file containing processed diagnostic should be
        written.

    Returns:
    --------
    None

    Raises:
    -------
    IOError : If no relevant data cubes are found at given path.
    Exception : No spotdata returned.

    """
    # Search directory structure for all files relevant to current diagnostic.
    files_to_read = [
        os.path.join(dirpath, filename)
        for dirpath, _, files in os.walk(data_path) for filename in files
        if diagnostic['filepath'] in filename
    ]
    if not files_to_read:
        raise IOError('No relevant data files found in {}.'.format(data_path))

    # Load cubes into an iris.cube.CubeList.
    cubes = Load('multi_file').process(files_to_read,
                                       diagnostic['diagnostic_name'])

    # Grab the relevant set of grid point neighbours for the neighbour finding
    # method being used by this diagnostic.
    neighbour_hash = construct_neighbour_hash(diagnostic['neighbour_finding'])
    neighbour_list = neighbours[neighbour_hash]

    # Check if additional diagnostics are needed (e.g. multi-level data).
    # If required, load into the additional_diagnostics dictionary.
    additional_diagnostics = get_method_prerequisites(
        diagnostic['interpolation_method'], data_path)

    # Create empty iris.cube.CubeList to hold extracted data cubes.
    resulting_cubes = CubeList()

    # Get optional kwargs that may be set to override defaults.
    optionals = [
        'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance',
        'dthetadz_threshold', 'dz_max_adjustment'
    ]
    kwargs = {}
    if ancillary_data.get('config_constants') is not None:
        for optional in optionals:
            constant = ancillary_data.get('config_constants').get(optional)
            if constant is not None:
                kwargs[optional] = constant

    # Loop over forecast times.
    for a_time in forecast_times:
        # Extract Cube from CubeList at current time.
        time_extract = datetime_constraint(a_time)
        cube = extract_cube_at_time(cubes, a_time, time_extract)
        if cube is None:
            # If no cube is available at given time, try the next time.
            continue

        ad = {}
        if additional_diagnostics is not None:
            # Extract additional diagnostcs at current time.
            ad = extract_ad_at_time(additional_diagnostics, a_time,
                                    time_extract)

        args = (cube, sites, neighbour_list, ancillary_data, ad)

        # Extract diagnostic data using defined method.
        resulting_cubes.append(
            ExtractData(diagnostic['interpolation_method']).process(
                *args, **kwargs))

    # Concatenate CubeList into Cube, creating a time DimCoord, and write out.
    if resulting_cubes:
        cube_out, = resulting_cubes.concatenate()
        WriteOutput('as_netcdf', dir_path=output_path).process(cube_out)
    else:
        raise Exception('No data available at given forecast times.')

    # If set in the configuration, extract the diagnostic maxima and minima
    # values.
    if diagnostic['extrema']:
        extrema_cubes = ExtractExtrema(24, start_hour=9).process(cube_out)
        extrema_cubes = extrema_cubes.merge()
        for extrema_cube in extrema_cubes:
            WriteOutput('as_netcdf',
                        dir_path=output_path).process(extrema_cube)