def test_extra_columns(self): """Test with an extra column in the input dataframe. In this case the extra column gets removed when the table is pivoted. """ # Set up expected dataframe. expected_data = [[280., np.nan], [np.nan, 281.]] expected_df = pd.DataFrame(expected_data, columns=["T+000", "T+001"]) expected_df.columns.name = "forecast_period" # Set up cube with multiple lead times in. second_cube = set_up_spot_cube( 281, number_of_sites=1, validity_time=1487311200 + 3600, forecast_period=1, ) merged_cube = CubeList([self.cube, second_cube]) merged_cube = merged_cube.concatenate() # Set up input dataframe data = [[1487311200, 3001, 280.], [1487311200 + 3600, 3002, 281.]] columns = ["time", "wmo_site", "values"] input_df = pd.DataFrame(data, columns=columns) result = self.plugin.pivot_table(merged_cube[0], input_df) assert_frame_equal(expected_df, result)
def interpolate_alongacross(Processes, Track, cell, dx=500, width=10000, z_coord='model_level_number', height_levels=np.arange(2, 20000, 2000)): ''' function description ''' from iris import Constraint from iris.cube import CubeList Track_cell = Track[Track['cell'] == cell] time = Track_cell['time'].values x = Track_cell['projection_x_coordinate'].values y = Track_cell['projection_y_coordinate'].values alpha = calculate_alpha_all(x, y) cubelist_Processes_along = CubeList() cubelist_Processes_across = CubeList() for i, time_i in enumerate(time): logging.debug(time_i) constraint_time = Constraint(time=time_i) grid_along, grid_across = make_cubes_alongacross( x=x[i], y=y[i], alpha=alpha[i], cube_sample=Processes.extract(constraint_time)[0], dx=dx, width=width, z_coord=z_coord, height_levels=height_levels) Processes_along_i, Processes_across_i = interpolate_to_cubes( Processes.extract(constraint_time), grid_along, grid_across, z_coord=z_coord) cubelist_Processes_along.extend(Processes_along_i) cubelist_Processes_across.extend(Processes_across_i) Processes_along = cubelist_Processes_along.concatenate() Processes_across = cubelist_Processes_across.concatenate() return Processes_along, Processes_across
def setUp(self): """Set up the plugin and dataframe needed for these tests""" self.cube = set_up_spot_cube(280, number_of_sites=1) second_cube = self.cube.copy() second_cube.coord("percentile").points = np.array([60.0]) cubelist = CubeList([self.cube, second_cube]) self.cubes = cubelist.concatenate() self.plugin = SpotDatabase("csv", "output", "improver", "time", "IMPRO", 0)
def concatenate_nc_files(cubes: CubeList, filename: str) -> None: try: logger.info('Concatenating files...') new_cube = cubes.concatenate() logger.info(new_cube[0]) output_nc_file = create_output_file(filename) logger.info(f'Saving {output_nc_file}...') iris.save(new_cube[0], output_nc_file) except Exception as e: raise MergeError(e) return output_nc_file
def test_exception_if_pivot_dim_set(self): """Test it raises an exception if a 2D cube is input and no pivot_dim.""" plugin = SpotDatabase("csv", "output", "improver", "time", "IMPRO", 0, pivot_dim="percentile") cube = set_up_spot_cube(280, number_of_sites=3) second_cube = cube.copy() second_cube.coord("percentile").points = np.array([60.0]) cubelist = CubeList([cube, second_cube]) cubes = cubelist.concatenate() message = "Dimensions that are not described by the pivot_dim or "\ "coord_to_slice_over must only have one point in. "\ "Dimension '2' has length '3' and is associated with the "\ "'index' coordinate." with self.assertRaisesRegex(ValueError, message): plugin.check_input_dimensions(cubes[0])
def test_multiple_times_cube(self): """Test using one input cube, with one site and multiple times.""" # Set up expected dataframe. expected_data = [[280., np.nan], [np.nan, 281.]] expected_df = pd.DataFrame(expected_data, columns=["T+000", "T+001"]) expected_df.columns.name = "forecast_period" # Set up cube with multiple lead times in. second_cube = set_up_spot_cube( 281, number_of_sites=1, validity_time=1487311200 + 3600, forecast_period=1, ) merged_cube = CubeList([self.cube, second_cube]) merged_cube = merged_cube.concatenate() # Set up input dataframe data = [[1487311200, 280.], [1487311200 + 3600, 281.]] columns = ["time", "values"] input_df = pd.DataFrame(data, columns=columns) result = self.plugin.pivot_table(merged_cube[0], input_df) assert_frame_equal(expected_df, result)
def interpolate_alongacross_mean(Processes, Track, cell, dx, width, z_coord='model_level_number', height_level_borders=np.arange( 0, 20000, 2000)): from iris import Constraint from iris.cube import CubeList Track_cell = Track[Track['cell'] == cell] time = Track_cell['time'].values x = Track_cell['projection_x_coordinate'].values y = Track_cell['projection_y_coordinate'].values alpha = calculate_alpha_all(x, y) cubelist_Processes_along = CubeList() cubelist_Processes_across = CubeList() for i, time_i in enumerate(time): logging.debug(time_i) constraint_time = Constraint(time=time_i) n_add_width = 2 box_slice = [[ x[i] - (width + n_add_width) * dx, x[i] + (width + n_add_width) * dx ], [ y[i] - (width + n_add_width) * dx, y[i] + (width + n_add_width) * dx ]] x_min = box_slice[0][0] x_max = box_slice[0][1] y_min = box_slice[1][0] y_max = box_slice[1][1] constraint_x = Constraint(projection_x_coordinate=lambda cell: int( x_min) < cell < int(x_max)) constraint_y = Constraint(projection_y_coordinate=lambda cell: int( y_min) < cell < int(y_max)) constraint = constraint_time & constraint_x & constraint_y grid_along, grid_across = make_cubes_alongacross_mean( x=x[i], y=y[i], alpha=alpha[i], cube_sample=Processes.extract(constraint)[0], dx=dx, width=width, height_level_borders=height_level_borders) Processes_along_i, Processes_across_i = interpolate_to_cubes_mean( Processes.extract(constraint), grid_along, grid_across, height_level_borders=height_level_borders) cubelist_Processes_along.extend(Processes_along_i) cubelist_Processes_across.extend(Processes_across_i) Processes_along = cubelist_Processes_along.concatenate() Processes_across = cubelist_Processes_across.concatenate() return Processes_along, Processes_across
def process_diagnostic(diagnostic, neighbours, sites, forecast_times, data_path, ancillary_data, output_path=None): """ Extract data and write output for a given diagnostic. Args: ----- diagnostic : string String naming the diagnostic to be processed. neighbours : numpy.array Array of neigbouring grid points that are associated with sites in the SortedDictionary of sites. sites : dict A dictionary containing the properties of spotdata sites. forecast_times : list[datetime.datetime objects] A list of datetimes representing forecast times for which data is required. data_path : string Path to diagnostic data files. ancillary_data : dict A dictionary containing additional model data that is needed. e.g. {'orography': <cube of orography>} output_path : str Path to which output file containing processed diagnostic should be written. Returns: -------- None Raises: ------- IOError : If no relevant data cubes are found at given path. Exception : No spotdata returned. """ # Search directory structure for all files relevant to current diagnostic. files_to_read = [ os.path.join(dirpath, filename) for dirpath, _, files in os.walk(data_path) for filename in files if diagnostic['filepath'] in filename ] if not files_to_read: raise IOError('No relevant data files found in {}.'.format(data_path)) # Load cubes into an iris.cube.CubeList. cubes = Load('multi_file').process(files_to_read, diagnostic['diagnostic_name']) # Grab the relevant set of grid point neighbours for the neighbour finding # method being used by this diagnostic. neighbour_hash = construct_neighbour_hash(diagnostic['neighbour_finding']) neighbour_list = neighbours[neighbour_hash] # Check if additional diagnostics are needed (e.g. multi-level data). # If required, load into the additional_diagnostics dictionary. additional_diagnostics = get_method_prerequisites( diagnostic['interpolation_method'], data_path) # Create empty iris.cube.CubeList to hold extracted data cubes. resulting_cubes = CubeList() # Get optional kwargs that may be set to override defaults. optionals = [ 'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance', 'dthetadz_threshold', 'dz_max_adjustment' ] kwargs = {} if ancillary_data.get('config_constants') is not None: for optional in optionals: constant = ancillary_data.get('config_constants').get(optional) if constant is not None: kwargs[optional] = constant # Loop over forecast times. for a_time in forecast_times: # Extract Cube from CubeList at current time. time_extract = datetime_constraint(a_time) cube = extract_cube_at_time(cubes, a_time, time_extract) if cube is None: # If no cube is available at given time, try the next time. continue ad = {} if additional_diagnostics is not None: # Extract additional diagnostcs at current time. ad = extract_ad_at_time(additional_diagnostics, a_time, time_extract) args = (cube, sites, neighbour_list, ancillary_data, ad) # Extract diagnostic data using defined method. resulting_cubes.append( ExtractData(diagnostic['interpolation_method']).process( *args, **kwargs)) # Concatenate CubeList into Cube, creating a time DimCoord, and write out. if resulting_cubes: cube_out, = resulting_cubes.concatenate() WriteOutput('as_netcdf', dir_path=output_path).process(cube_out) else: raise Exception('No data available at given forecast times.') # If set in the configuration, extract the diagnostic maxima and minima # values. if diagnostic['extrema']: extrema_cubes = ExtractExtrema(24, start_hour=9).process(cube_out) extrema_cubes = extrema_cubes.merge() for extrema_cube in extrema_cubes: WriteOutput('as_netcdf', dir_path=output_path).process(extrema_cube)