def main(argv=None): """Load in arguments and ensure they are set correctly. Then load in the data to blend and calculate default weights using the method chosen before carrying out the blending.""" parser = ArgParser( description='Calculate the default weights to apply in weighted ' 'blending plugins using the ChooseDefaultWeightsLinear or ' 'ChooseDefaultWeightsNonLinear plugins. Then apply these ' 'weights to the dataset using the BasicWeightedAverage plugin.' ' Required for ChooseDefaultWeightsLinear: y0val and ynval.' ' Required for ChooseDefaultWeightsNonLinear: cval.' ' Required for ChooseWeightsLinear with dict: wts_dict.') parser.add_argument('--wts_calc_method', metavar='WEIGHTS_CALCULATION_METHOD', choices=['linear', 'nonlinear', 'dict'], default='linear', help='Method to use to calculate ' 'weights used in blending. "linear" (default): ' 'calculate linearly varying blending weights. ' '"nonlinear": calculate blending weights that decrease' ' exponentially with increasing blending coordinate. ' '"dict": calculate weights using a dictionary passed ' 'in as a command line argument.') parser.add_argument('coordinate', type=str, metavar='COORDINATE_TO_AVERAGE_OVER', help='The coordinate over which the blending ' 'will be applied.') parser.add_argument('--coordinate_unit', metavar='UNIT_STRING', default='hours since 1970-01-01 00:00:00', help='Units for blending coordinate. Default= ' 'hours since 1970-01-01 00:00:00') parser.add_argument('--calendar', metavar='CALENDAR', help='Calendar for time coordinate. Default=gregorian') parser.add_argument('--cycletime', metavar='CYCLETIME', type=str, help='The forecast reference time to be used after ' 'blending has been applied, in the format ' 'YYYYMMDDTHHMMZ. If not provided, the blended file ' 'will take the latest available forecast reference ' 'time from the input cubes supplied.') parser.add_argument('--model_id_attr', metavar='MODEL_ID_ATTR', type=str, default="mosg__model_configuration", help='The name of the netCDF file attribute to be ' 'used to identify the source model for ' 'multi-model blends. Default assumes Met Office ' 'model metadata. Must be present on all input ' 'files if blending over models.') parser.add_argument('--spatial_weights_from_mask', action='store_true', default=False, help='If set this option will result in the generation' ' of spatially varying weights based on the' ' masks of the data we are blending. The' ' one dimensional weights are first calculated ' ' using the chosen weights calculation method,' ' but the weights will then be adjusted spatially' ' based on where there is masked data in the data' ' we are blending. The spatial weights are' ' calculated using the' ' SpatiallyVaryingWeightsFromMask plugin.') parser.add_argument('weighting_mode', metavar='WEIGHTED_BLEND_MODE', choices=['weighted_mean', 'weighted_maximum'], help='The method used in the weighted blend. ' '"weighted_mean": calculate a normal weighted' ' mean across the coordinate. ' '"weighted_maximum": multiplies the values in the' ' coordinate by the weights, and then takes the' ' maximum.') parser.add_argument('input_filepaths', metavar='INPUT_FILES', nargs="+", help='Paths to input files to be blended.') parser.add_argument('output_filepath', metavar='OUTPUT_FILE', help='The output path for the processed NetCDF.') spatial = parser.add_argument_group( 'Spatial weights from mask options', 'Options for calculating the spatial weights using the ' 'SpatiallyVaryingWeightsFromMask plugin.') spatial.add_argument('--fuzzy_length', metavar='FUZZY_LENGTH', type=float, default=20000, help='When calculating spatially varying weights we' ' can smooth the weights so that areas close to' ' areas that are masked have lower weights than' ' those further away. This fuzzy length controls' ' the scale over which the weights are smoothed.' ' The fuzzy length is in terms of m, the' ' default is 20km. This distance is then' ' converted into a number of grid squares,' ' which does not have to be an integer. Assumes' ' the grid spacing is the same in the x and y' ' directions, and raises an error if this is not' ' true. See SpatiallyVaryingWeightsFromMask for' ' more detail.') linear = parser.add_argument_group( 'linear weights options', 'Options for the linear weights ' 'calculation in ' 'ChooseDefaultWeightsLinear') linear.add_argument('--y0val', metavar='LINEAR_STARTING_POINT', type=float, help='The relative value of the weighting start point ' '(lowest value of blend coord) for choosing default ' 'linear weights. This must be a positive float or 0.') linear.add_argument('--ynval', metavar='LINEAR_END_POINT', type=float, help='The relative value of the weighting ' 'end point (highest value of blend coord) for choosing' ' default linear weights. This must be a positive ' 'float or 0. Note that if blending over forecast ' 'reference time, ynval >= y0val would normally be ' 'expected (to give greater weight to the more recent ' 'forecast).') nonlinear = parser.add_argument_group( 'nonlinear weights options', 'Options for the non-linear ' 'weights calculation in ' 'ChooseDefaultWeightsNonLinear') nonlinear.add_argument('--cval', metavar='NON_LINEAR_FACTOR', type=float, help='Factor used to determine how skewed the ' 'non linear weights will be. ' 'A value of 1 implies equal weighting. If not ' 'set, a default value of cval=0.85 is set.') wts_dict = parser.add_argument_group( 'dict weights options', 'Options for linear weights to be ' 'calculated based on parameters ' 'read from a json file dict') wts_dict.add_argument('--wts_dict', metavar='WEIGHTS_DICTIONARY', help='Path to json file containing dictionary from ' 'which to calculate blending weights. Dictionary ' 'format is as specified in the improver.blending.' 'weights.ChooseWeightsLinear plugin.') wts_dict.add_argument('--weighting_coord', metavar='WEIGHTING_COORD', default='forecast_period', help='Name of ' 'coordinate over which linear weights should be ' 'scaled. This coordinate must be avilable in the ' 'weights dictionary.') args = parser.parse_args(args=argv) # if the linear weights method is called with non-linear args or vice # versa, exit with error if (args.wts_calc_method == "linear") and args.cval: parser.wrong_args_error('cval', 'linear') if ((args.wts_calc_method == "nonlinear") and np.any([args.y0val, args.ynval])): parser.wrong_args_error('y0val, ynval', 'non-linear') if (args.wts_calc_method == "dict") and not args.wts_dict: parser.error('Dictionary is required if --wts_calc_method="dict"') # set blending coordinate units if "time" in args.coordinate: coord_unit = Unit(args.coordinate_unit, args.calendar) elif args.coordinate_unit != 'hours since 1970-01-01 00:00:00.': coord_unit = args.coordinate_unit else: coord_unit = 'no_unit' # For blending across models, only blending across "model_id" is directly # supported. This is because the blending coordinate must be sortable, in # order to ensure that the data cube and the weights cube have coordinates # in the same order for blending. Whilst the model_configuration is # sortable itself, as it is associated with model_id, which is the # dimension coordinate, sorting the model_configuration coordinate can # result in the model_id coordinate becoming non-monotonic. As dimension # coordinates must be monotonic, this leads to the model_id coordinate # being demoted to an auxiliary coordinate. Therefore, for simplicity # model_id is used as the blending coordinate, instead of # model_configuration. # TODO: Support model_configuration as a blending coordinate directly. if args.coordinate == "model_configuration": blend_coord = "model_id" dict_coord = "model_configuration" else: blend_coord = args.coordinate dict_coord = args.coordinate # load cubes to be blended cubelist = load_cubelist(args.input_filepaths) # determine whether or not to equalise forecast periods for model # blending weights calculation weighting_coord = (args.weighting_coord if args.weighting_coord else "forecast_period") # prepare cubes for weighted blending merger = MergeCubesForWeightedBlending(blend_coord, weighting_coord=weighting_coord, model_id_attr=args.model_id_attr) cube = merger.process(cubelist, cycletime=args.cycletime) # if the coord for blending does not exist or has only one value, # update metadata only coord_names = [coord.name() for coord in cube.coords()] if (blend_coord not in coord_names) or (len( cube.coord(blend_coord).points) == 1): result = cube.copy() conform_metadata(result, cube, blend_coord, cycletime=args.cycletime) # raise a warning if this happened because the blend coordinate # doesn't exist if blend_coord not in coord_names: warnings.warn('Blend coordinate {} is not present on input ' 'data'.format(blend_coord)) # otherwise, calculate weights and blend across specified dimension else: weights = calculate_blending_weights( cube, blend_coord, args.wts_calc_method, wts_dict=args.wts_dict, weighting_coord=args.weighting_coord, coord_unit=coord_unit, y0val=args.y0val, ynval=args.ynval, cval=args.cval, dict_coord=dict_coord) if args.spatial_weights_from_mask: check_if_grid_is_equal_area(cube) grid_cells_x, _ = convert_distance_into_number_of_grid_cells( cube, args.fuzzy_length, int_grid_cells=False) SpatialWeightsPlugin = SpatiallyVaryingWeightsFromMask( grid_cells_x) weights = SpatialWeightsPlugin.process(cube, weights, blend_coord) # blend across specified dimension BlendingPlugin = WeightedBlendAcrossWholeDimension( blend_coord, args.weighting_mode, cycletime=args.cycletime) result = BlendingPlugin.process(cube, weights=weights) save_netcdf(result, args.output_filepath)
def main(argv=None): """Load in arguments for applying neighbourhood processing when using a mask.""" parser = ArgParser( description='Neighbourhood the input dataset over two distinct regions' ' of land and sea. If performed as a single level neighbourhood, a ' 'land-sea mask should be provided. If instead topographic_zone ' 'neighbourhooding is being employed, the mask should be one of ' 'topographic zones. In the latter case a weights array is also needed' ' to collapse the topographic_zone coordinate. These weights are ' 'created with the improver generate-topography-bands-weights CLI and ' 'should be made using a land-sea mask, which will then be employed ' 'within this code to draw the distinction between the two surface ' 'types.') parser.add_argument('input_filepath', metavar='INPUT_FILE', help='A path to an input NetCDF file to be processed.') parser.add_argument('input_mask_filepath', metavar='INPUT_MASK', help=('A path to an input NetCDF file containing ' 'either a mask of topographic zones over land ' 'or a land-sea mask.')) parser.add_argument('output_filepath', metavar='OUTPUT_FILE', help='The output path for the processed NetCDF.') mask_group = parser.add_argument_group( 'Collapse weights - required if using a topographic zones mask') mask_group.add_argument('--weights_for_collapsing_dim', metavar='WEIGHTS', default=None, help='A path to an weights NetCDF file containing ' 'the weights which are used for collapsing the ' 'dimension gained through masking. These weights ' 'must have been created using a land-sea mask.') radius_group = parser.add_argument_group( 'Neighbourhooding Radius - Set only one of the options') group = radius_group.add_mutually_exclusive_group() group.add_argument('--radius', metavar='RADIUS', type=float, help='The radius (in m) for neighbourhood processing.') group.add_argument('--radii-by-lead-time', metavar=('RADII_BY_LEAD_TIME', 'LEAD_TIME_IN_HOURS'), nargs=2, help='The radii for neighbourhood processing ' 'and the associated lead times at which the radii are ' 'valid. The radii are in metres whilst the lead time ' 'has units of hours. The radii and lead times are ' 'expected as individual comma-separated lists with ' 'the list of radii given first followed by a list of ' 'lead times to indicate at what lead time each radii ' 'should be used. For example: 10000,12000,14000 1,2,3 ' 'where a lead time of 1 hour uses a radius of 10000m, ' 'a lead time of 2 hours uses a radius of 12000m, etc.') parser.add_argument('--sum_or_fraction', default="fraction", choices=["sum", "fraction"], help='The neighbourhood output can either be in the ' 'form of a sum of the neighbourhood, or a ' 'fraction calculated by dividing the sum of the ' 'neighbourhood by the neighbourhood area. ' '"fraction" is the default option.') parser.add_argument('--intermediate_filepath', default=None, help='Intermediate filepath for results following ' 'topographic masked neighbourhood processing of ' 'land points and prior to collapsing the ' 'topographic_zone coordinate. Intermediate files ' 'will not be produced if no topographic masked ' 'neighbourhood processing occurs.') args = parser.parse_args(args=argv) cube = load_cube(args.input_filepath) mask = load_cube(args.input_mask_filepath, no_lazy_load=True) weights = None if any([ 'topographic_zone' in coord.name() for coord in mask.coords(dim_coords=True) ]): if mask.attributes['topographic_zones_include_seapoints'] == 'True': raise ValueError('The topographic zones mask cube must have been ' 'masked to exclude sea points, but ' 'topographic_zones_include_seapoints = True') if not args.weights_for_collapsing_dim: raise IOError('A weights cube must be provided if using a mask ' 'of topographic zones to collapse the resulting ' 'vertical dimension.') weights = load_cube(args.weights_for_collapsing_dim, no_lazy_load=True) result, intermediate_cube = process(cube, mask, args.radius, args.radii_by_lead_time, weights, args.sum_or_fraction, args.intermediate_filepath) save_netcdf(result, args.output_filepath) if args.intermediate_filepath: save_netcdf(intermediate_cube, args.intermediate_filepath)
def main(argv=None): """Load in arguments and ensure they are set correctly. Then load in the data to blend and calculate default weights using the method chosen before carrying out the blending.""" parser = ArgParser( description='Calculate the default weights to apply in weighted ' 'blending plugins using the ChooseDefaultWeightsLinear or ' 'ChooseDefaultWeightsNonLinear plugins. Then apply these ' 'weights to the dataset using the BasicWeightedAverage plugin.' ' Required for ChooseDefaultWeightsLinear: y0val and ynval.' ' Required for ChooseDefaultWeightsNonLinear: cval.' ' Required for ChooseWeightsLinear with dict: wts_dict.') parser.add_argument('--wts_calc_method', metavar='WEIGHTS_CALCULATION_METHOD', choices=['linear', 'nonlinear', 'dict'], default='linear', help='Method to use to calculate ' 'weights used in blending. "linear" (default): ' 'calculate linearly varying blending weights. ' '"nonlinear": calculate blending weights that decrease' ' exponentially with increasing blending coordinate. ' '"dict": calculate weights using a dictionary passed ' 'in as a command line argument.') parser.add_argument('coordinate', type=str, metavar='COORDINATE_TO_AVERAGE_OVER', help='The coordinate over which the blending ' 'will be applied.') parser.add_argument('--cycletime', metavar='CYCLETIME', type=str, help='The forecast reference time to be used after ' 'blending has been applied, in the format ' 'YYYYMMDDTHHMMZ. If not provided, the blended file ' 'will take the latest available forecast reference ' 'time from the input cubes supplied.') parser.add_argument('--model_id_attr', metavar='MODEL_ID_ATTR', type=str, default=None, help='The name of the netCDF file attribute to be ' 'used to identify the source model for ' 'multi-model blends. Default is None. ' 'Must be present on all input ' 'files if blending over models.') parser.add_argument('--spatial_weights_from_mask', action='store_true', default=False, help='If set this option will result in the generation' ' of spatially varying weights based on the' ' masks of the data we are blending. The' ' one dimensional weights are first calculated ' ' using the chosen weights calculation method,' ' but the weights will then be adjusted spatially' ' based on where there is masked data in the data' ' we are blending. The spatial weights are' ' calculated using the' ' SpatiallyVaryingWeightsFromMask plugin.') parser.add_argument('input_filepaths', metavar='INPUT_FILES', nargs="+", help='Paths to input files to be blended.') parser.add_argument('output_filepath', metavar='OUTPUT_FILE', help='The output path for the processed NetCDF.') spatial = parser.add_argument_group( 'Spatial weights from mask options', 'Options for calculating the spatial weights using the ' 'SpatiallyVaryingWeightsFromMask plugin.') spatial.add_argument('--fuzzy_length', metavar='FUZZY_LENGTH', type=float, default=20000, help='When calculating spatially varying weights we' ' can smooth the weights so that areas close to' ' areas that are masked have lower weights than' ' those further away. This fuzzy length controls' ' the scale over which the weights are smoothed.' ' The fuzzy length is in terms of m, the' ' default is 20km. This distance is then' ' converted into a number of grid squares,' ' which does not have to be an integer. Assumes' ' the grid spacing is the same in the x and y' ' directions, and raises an error if this is not' ' true. See SpatiallyVaryingWeightsFromMask for' ' more detail.') linear = parser.add_argument_group( 'linear weights options', 'Options for the linear weights ' 'calculation in ' 'ChooseDefaultWeightsLinear') linear.add_argument('--y0val', metavar='LINEAR_STARTING_POINT', type=float, help='The relative value of the weighting start point ' '(lowest value of blend coord) for choosing default ' 'linear weights. This must be a positive float or 0.') linear.add_argument('--ynval', metavar='LINEAR_END_POINT', type=float, help='The relative value of the weighting ' 'end point (highest value of blend coord) for choosing' ' default linear weights. This must be a positive ' 'float or 0. Note that if blending over forecast ' 'reference time, ynval >= y0val would normally be ' 'expected (to give greater weight to the more recent ' 'forecast).') nonlinear = parser.add_argument_group( 'nonlinear weights options', 'Options for the non-linear ' 'weights calculation in ' 'ChooseDefaultWeightsNonLinear') nonlinear.add_argument('--cval', metavar='NON_LINEAR_FACTOR', type=float, help='Factor used to determine how skewed the ' 'non linear weights will be. A value of 1 ' 'implies equal weighting.') wts_dict = parser.add_argument_group( 'dict weights options', 'Options for linear weights to be ' 'calculated based on parameters ' 'read from a json file dict') wts_dict.add_argument('--wts_dict', metavar='WEIGHTS_DICTIONARY', help='Path to json file containing dictionary from ' 'which to calculate blending weights. Dictionary ' 'format is as specified in the improver.blending.' 'weights.ChooseWeightsLinear plugin.') wts_dict.add_argument('--weighting_coord', metavar='WEIGHTING_COORD', default='forecast_period', help='Name of ' 'coordinate over which linear weights should be ' 'scaled. This coordinate must be available in the ' 'weights dictionary.') args = parser.parse_args(args=argv) # reject incorrect argument combinations if (args.wts_calc_method == "linear") and args.cval: parser.wrong_args_error('cval', 'linear') if ((args.wts_calc_method == "nonlinear") and np.any([args.y0val, args.ynval])): parser.wrong_args_error('y0val, ynval', 'non-linear') if (args.wts_calc_method == "dict") and not args.wts_dict: parser.error('Dictionary is required if --wts_calc_method="dict"') weights_dict = load_json_or_none(args.wts_dict) # Load cubes to be blended. cubelist = load_cubelist(args.input_filepaths) result = process(cubelist, args.wts_calc_method, args.coordinate, args.cycletime, args.weighting_coord, weights_dict, args.y0val, args.ynval, args.cval, args.model_id_attr, args.spatial_weights_from_mask, args.fuzzy_length) save_netcdf(result, args.output_filepath)
def main(argv=None): """Load in arguments for applying neighbourhood processing when using a mask.""" parser = ArgParser( description='Neighbourhood the input dataset over two distinct regions' ' of land and sea. If performed as a single level neighbourhood, a ' 'land-sea mask should be provided. If instead topographic_zone ' 'neighbourhooding is being employed, the mask should be one of ' 'topographic zones. In the latter case a weights array is also needed' ' to collapse the topographic_zone coordinate. These weights are ' 'created with the improver generate-topography-bands-weights CLI and ' 'should be made using a land-sea mask, which will then be employed ' 'within this code to draw the distinction between the two surface ' 'types.') parser.add_argument('input_filepath', metavar='INPUT_FILE', help='A path to an input NetCDF file to be processed.') parser.add_argument('input_mask_filepath', metavar='INPUT_MASK', help=('A path to an input NetCDF file containing ' 'either a mask of topographic zones over land ' 'or a land-sea mask.')) parser.add_argument('output_filepath', metavar='OUTPUT_FILE', help='The output path for the processed NetCDF.') mask_group = parser.add_argument_group( 'Collapse weights - required if using a topographic zones mask') mask_group.add_argument('--weights_for_collapsing_dim', metavar='WEIGHTS', default=None, help='A path to an weights NetCDF file containing ' 'the weights which are used for collapsing the ' 'dimension gained through masking. These weights ' 'must have been created using a land-sea mask.') radius_group = parser.add_argument_group( 'Neighbourhooding Radius - Set only one of the options') group = radius_group.add_mutually_exclusive_group() group.add_argument('--radius', metavar='RADIUS', type=float, help='The radius (in m) for neighbourhood processing.') group.add_argument('--radii-by-lead-time', metavar=('RADII_BY_LEAD_TIME', 'LEAD_TIME_IN_HOURS'), nargs=2, help='The radii for neighbourhood processing ' 'and the associated lead times at which the radii are ' 'valid. The radii are in metres whilst the lead time ' 'has units of hours. The radii and lead times are ' 'expected as individual comma-separated lists with ' 'the list of radii given first followed by a list of ' 'lead times to indicate at what lead time each radii ' 'should be used. For example: 10000,12000,14000 1,2,3 ' 'where a lead time of 1 hour uses a radius of 10000m, ' 'a lead time of 2 hours uses a radius of 12000m, etc.') parser.add_argument('--sum_or_fraction', default="fraction", choices=["sum", "fraction"], help='The neighbourhood output can either be in the ' 'form of a sum of the neighbourhood, or a ' 'fraction calculated by dividing the sum of the ' 'neighbourhood by the neighbourhood area. ' '"fraction" is the default option.') parser.add_argument('--intermediate_filepath', default=None, help='Intermediate filepath for results following ' 'topographic masked neighbourhood processing of ' 'land points and prior to collapsing the ' 'topographic_zone coordinate. Intermediate files ' 'will not be produced if no topographic masked ' 'neighbourhood processing occurs.') args = parser.parse_args(args=argv) cube = load_cube(args.input_filepath) mask = load_cube(args.input_mask_filepath, no_lazy_load=True) masking_coordinate = None if any([ 'topographic_zone' in coord.name() for coord in mask.coords(dim_coords=True) ]): if mask.attributes['topographic_zones_include_seapoints'] == 'True': raise ValueError('The topographic zones mask cube must have been ' 'masked to exclude sea points, but ' 'topographic_zones_include_seapoints = True') if not args.weights_for_collapsing_dim: raise IOError('A weights cube must be provided if using a mask ' 'of topographic zones to collapse the resulting ' 'vertical dimension.') weights = load_cube(args.weights_for_collapsing_dim, no_lazy_load=True) if weights.attributes['topographic_zones_include_seapoints'] == 'True': raise ValueError('The weights cube must be masked to exclude sea ' 'points, but topographic_zones_include_seapoints ' '= True') masking_coordinate = 'topographic_zone' landmask = weights[0].copy(data=weights[0].data.mask) landmask.rename('land_binary_mask') landmask.remove_coord(masking_coordinate) # Create land and sea masks in IMPROVER format (inverse of # numpy standard) 1 - include this region, 0 - exclude this region. land_only = landmask.copy( data=np.logical_not(landmask.data).astype(int)) sea_only = landmask.copy(data=landmask.data.astype(int)) else: if args.weights_for_collapsing_dim: warnings.warn('A weights cube has been provided but will not be ' 'used as there is no topographic zone coordinate ' 'to collapse.') landmask = mask # In this case the land is set to 1 and the sea is set to 0 in the # input mask. sea_only = landmask.copy( data=np.logical_not(landmask.data).astype(int)) land_only = landmask.copy(data=landmask.data.astype(int)) if args.radius: radius_or_radii = args.radius lead_times = None elif args.radii_by_lead_time: radius_or_radii = args.radii_by_lead_time[0].split(",") lead_times = args.radii_by_lead_time[1].split(",") if args.intermediate_filepath is not None and masking_coordinate is None: msg = ('No topographic_zone coordinate found, so no intermediate file ' 'will be saved.') warnings.warn(msg) # Section for neighbourhood processing land points. if land_only.data.max() > 0.0: if masking_coordinate is not None: result_land = ApplyNeighbourhoodProcessingWithAMask( masking_coordinate, radius_or_radii, lead_times=lead_times, sum_or_fraction=args.sum_or_fraction, re_mask=False).process(cube, mask) else: result_land = NeighbourhoodProcessing( 'square', radius_or_radii, lead_times=lead_times, sum_or_fraction=args.sum_or_fraction, re_mask=True).process(cube, land_only) if masking_coordinate is not None: if args.intermediate_filepath is not None: save_netcdf(result_land, args.intermediate_filepath) # Collapse the masking coordinate. result_land = CollapseMaskedNeighbourhoodCoordinate( masking_coordinate, weights=weights).process(result_land) result = result_land # Section for neighbourhood processing sea points. if sea_only.data.max() > 0.0: result_sea = NeighbourhoodProcessing( 'square', radius_or_radii, lead_times=lead_times, sum_or_fraction=args.sum_or_fraction, re_mask=True).process(cube, sea_only) result = result_sea # Section for combining land and sea points following land and sea points # being neighbourhood processed individually. if sea_only.data.max() > 0.0 and land_only.data.max() > 0.0: # Recombine cubes to be a single output. combined_data = result_land.data.filled(0) + result_sea.data.filled(0) result = result_land.copy(data=combined_data) save_netcdf(result, args.output_filepath)
def main(argv=None): """ Load in the arguments and apply the requested variant of Ensemble Copula Coupling for converting percentile data to realizations. """ parser = ArgParser( description='Convert a dataset containing ' 'probabilities into one containing ' 'ensemble realizations using Ensemble Copula Coupling.') # General options: parser.add_argument('input_filepath', metavar='INPUT_FILE', help='A path to an input NetCDF file to be processed.' ' Must contain a percentile dimension.') parser.add_argument('output_filepath', metavar='OUTPUT_FILE', help='The output path for the processed NetCDF.') parser.add_argument('--no_of_percentiles', default=None, type=int, metavar='NUMBER_OF_PERCENTILES', help='The number of percentiles to be generated. ' 'This is also equal to the number of ensemble ' 'realizations that will be generated.') parser.add_argument('--sampling_method', default='quantile', const='quantile', nargs='?', choices=['quantile', 'random'], metavar='PERCENTILE_SAMPLING_METHOD', help='Method to be used for generating the list of ' 'percentiles with forecasts generated at each ' 'percentile. The options are "quantile" and ' '"random". "quantile" is the default option. ' 'The "quantile" option produces equally spaced ' 'percentiles which is the preferred ' 'option for full Ensemble Copula Coupling with ' 'reordering enabled.') parser.add_argument( '--ecc_bounds_warning', default=False, action='store_true', help='If True, where percentiles (calculated as an intermediate ' 'output before realizations) exceed the ECC bounds range, raise ' 'a warning rather than an exception.') # Different use cases: # (We can either reorder OR rebadge) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--reordering', default=False, action='store_true', help='The option used to create ensemble realizations ' 'from percentiles by reordering the input ' 'percentiles based on the order of the ' 'raw ensemble forecast.') group.add_argument('--rebadging', default=False, action='store_true', help='The option used to create ensemble realizations ' 'from percentiles by rebadging the input ' 'percentiles.') # If reordering, can do so either based on original realizations, # or randomly. reordering = parser.add_argument_group( 'Reordering options', 'Options for reordering the input percentiles ' 'using the raw ensemble forecast as required to create ensemble ' 'realizations.') reordering.add_argument('--raw_forecast_filepath', metavar='RAW_FORECAST_FILE', help='A path to an raw forecast NetCDF file to be ' 'processed. This option is compulsory, if the ' 'reordering option is selected.') reordering.add_argument('--random_ordering', default=False, action='store_true', help='Decide whether or not to use random ' 'ordering within the ensemble reordering step.') reordering.add_argument( '--random_seed', default=None, help='Option to specify a value for the random seed for testing ' 'purposes, otherwise, the default random seed behaviour is ' 'utilised. The random seed is used in the generation of the ' 'random numbers used for either the random_ordering option to ' 'order the input percentiles randomly, rather than use the ' 'ordering from the raw ensemble, or for splitting tied values ' 'within the raw ensemble, so that the values from the input ' 'percentiles can be ordered to match the raw ensemble.') rebadging = parser.add_argument_group( 'Rebadging options', 'Options for rebadging the input percentiles ' 'as ensemble realizations.') rebadging.add_argument('--realization_numbers', default=None, metavar='REALIZATION_NUMBERS', nargs="+", help='A list of ensemble realization numbers to ' 'use when rebadging the percentiles ' 'into realizations.') args = parser.parse_args(args=argv) # CLI argument checking: # Can only do one of reordering or rebadging: if options are passed which # correspond to the opposite method, raise an exception. # Note: Shouldn't need to check that both/none are set, since they are # defined as mandatory, but mutually exclusive, options. if args.reordering: if args.realization_numbers is not None: parser.wrong_args_error('realization_numbers', 'reordering') if args.rebadging: if ((args.raw_forecast_filepath is not None) or (args.random_ordering is not False)): parser.wrong_args_error( 'raw_forecast_filepath, random_ordering', 'rebadging') # Convert the string of realization_numbers to a list of ints. realization_numbers = None if args.rebadging: if args.realization_numbers is not None: realization_numbers = ( [int(num) for num in args.realization_numbers]) cube = load_cube(args.input_filepath) raw_forecast = load_cube(args.raw_forecast_filepath, allow_none=True) # Process Cube result_cube = process(cube, raw_forecast, args.no_of_percentiles, args.sampling_method, args.ecc_bounds_warning, args.reordering, args.rebadging, args.random_ordering, args.random_seed, realization_numbers) # Save Cube save_netcdf(result_cube, args.output_filepath)
def main(argv=None): """Extrapolate data forward in time.""" parser = ArgParser( description="Extrapolate input data to required lead times.") parser.add_argument("input_filepath", metavar="INPUT_FILEPATH", type=str, help="Path to input NetCDF file.") group = parser.add_mutually_exclusive_group() group.add_argument("--output_dir", metavar="OUTPUT_DIR", type=str, default="", help="Directory to write output files.") group.add_argument("--output_filepaths", nargs="+", type=str, help="List of full paths to output nowcast files, in " "order of increasing lead time.") optflw = parser.add_argument_group('Advect using files containing the x ' ' and y components of the velocity') optflw.add_argument("--eastward_advection_filepath", type=str, help="Path" " to input file containing Eastward advection " "velocities.") optflw.add_argument("--northward_advection_filepath", type=str, help="Path" " to input file containing Northward advection " "velocities.") speed = parser.add_argument_group('Advect using files containing speed and' ' direction') speed.add_argument("--advection_speed_filepath", type=str, help="Path" " to input file containing advection speeds," " usually wind speeds, on multiple pressure levels.") speed.add_argument("--advection_direction_filepath", type=str, help="Path to input file containing the directions from" " which advection speeds are coming (180 degrees from" " the direction in which the speed is directed). The" " directions should be on the same grid as the input" " speeds, including the same vertical levels.") speed.add_argument("--pressure_level", type=int, default=75000, help="The" " pressure level in Pa to extract from the multi-level" " advection_speed and advection_direction files. The" " velocities at this level are used for advection.") parser.add_argument("--orographic_enhancement_filepaths", nargs="+", type=str, default=None, help="List or wildcarded " "file specification to the input orographic " "enhancement files. Orographic enhancement files are " "compulsory for precipitation fields.") parser.add_argument("--json_file", metavar="JSON_FILE", default=None, help="Filename for the json file containing " "required changes to the metadata. Information " "describing the intended contents of the json file " "is available in " "improver.utilities.cube_metadata.amend_metadata." "Every output cube will have the metadata_dict " "applied. Defaults to None.", type=str) parser.add_argument("--max_lead_time", type=int, default=360, help="Maximum lead time required (mins).") parser.add_argument("--lead_time_interval", type=int, default=15, help="Interval between required lead times (mins).") accumulation_args = parser.add_argument_group( 'Calculate accumulations from advected fields') accumulation_args.add_argument( "--accumulation_fidelity", type=int, default=0, help="If set, this CLI will additionally return accumulations" " calculated from the advected fields. This fidelity specifies the" " time interval in minutes between advected fields that is used to" " calculate these accumulations. This interval must be a factor of" " the lead_time_interval.") accumulation_args.add_argument( "--accumulation_units", type=str, default='m', help="Desired units in which the accumulations should be expressed," "e.g. mm") args = parser.parse_args(args=argv) upath, vpath = (args.eastward_advection_filepath, args.northward_advection_filepath) spath, dpath = (args.advection_speed_filepath, args.advection_direction_filepath) # load files and initialise advection plugin input_cube = load_cube(args.input_filepath) if (upath and vpath) and not (spath or dpath): ucube = load_cube(upath) vcube = load_cube(vpath) elif (spath and dpath) and not (upath or vpath): level_constraint = Constraint(pressure=args.pressure_level) try: scube = load_cube(spath, constraints=level_constraint) dcube = load_cube(dpath, constraints=level_constraint) except ValueError as err: raise ValueError( '{} Unable to extract specified pressure level from given ' 'speed and direction files.'.format(err)) ucube, vcube = ResolveWindComponents().process(scube, dcube) else: raise ValueError('Cannot mix advection component velocities with speed' ' and direction') oe_cube = None if args.orographic_enhancement_filepaths: oe_cube = load_cube(args.orographic_enhancement_filepaths) metadata_dict = None if args.json_file: # Load JSON file for metadata amendments. with open(args.json_file, 'r') as input_file: metadata_dict = json.load(input_file) # generate list of lead times in minutes lead_times = np.arange(0, args.max_lead_time+1, args.lead_time_interval) if args.output_filepaths: if len(args.output_filepaths) != len(lead_times): raise ValueError("Require exactly one output file name for each " "forecast lead time") # determine whether accumulations are also to be returned. time_interval = args.lead_time_interval if args.accumulation_fidelity > 0: fraction, _ = np.modf(args.lead_time_interval / args.accumulation_fidelity) if fraction != 0: msg = ("The specified lead_time_interval ({}) is not cleanly " "divisible by the specified accumulation_fidelity ({}). As " "a result the lead_time_interval cannot be constructed from" " accumulation cubes at this fidelity.".format( args.lead_time_interval, args.accumulation_fidelity)) raise ValueError(msg) time_interval = args.accumulation_fidelity lead_times = np.arange(0, args.max_lead_time+1, time_interval) lead_time_filter = args.lead_time_interval // time_interval forecast_plugin = CreateExtrapolationForecast( input_cube, ucube, vcube, orographic_enhancement_cube=oe_cube, metadata_dict=metadata_dict) # extrapolate input data to required lead times forecast_cubes = iris.cube.CubeList() for i, lead_time in enumerate(lead_times): forecast_cubes.append( forecast_plugin.extrapolate(leadtime_minutes=lead_time)) # return rate cubes for i, cube in enumerate(forecast_cubes[::lead_time_filter]): # save to a suitably-named output file if args.output_filepaths: file_name = args.output_filepaths[i] else: file_name = os.path.join( args.output_dir, generate_file_name(cube)) save_netcdf(cube, file_name) # calculate accumulations if required if args.accumulation_fidelity > 0: plugin = Accumulation(accumulation_units=args.accumulation_units, accumulation_period=args.lead_time_interval * 60) accumulation_cubes = plugin.process(forecast_cubes) # return accumulation cubes for i, cube in enumerate(accumulation_cubes): file_name = os.path.join(args.output_dir, generate_file_name(cube)) save_netcdf(cube, file_name)
def main(argv=None): """ Standardise a source cube. Available options are regridding (bilinear or nearest-neighbour, optionally with land-mask awareness), updating meta-data and converting float64 data to float32. A check for float64 data compliance can be made by only specify a source NetCDF file with no other arguments. """ parser = ArgParser( description='Standardise a source data cube. Three main options are ' 'available; fixing float64 data, regridding and updating ' 'metadata. If regridding then additional options are ' 'available to use bilinear or nearest-neighbour ' '(optionally with land-mask awareness) modes. If only a ' 'source file is specified with no other arguments, then ' 'an exception will be raised if float64 data are found on ' 'the source.') parser.add_argument('source_data_filepath', metavar='SOURCE_DATA', help='A cube of data that is to be standardised and ' 'optionally fixed for float64 data, regridded ' 'and meta data changed') parser.add_argument("--output_filepath", metavar="OUTPUT_FILE", default=None, help="The output path for the processed NetCDF. " "If only a source file is specified and no " "output file, then the source will be checked" "for float64 data.") regrid_group = parser.add_argument_group("Regridding options") regrid_group.add_argument( "--target_grid_filepath", metavar="TARGET_GRID", help=('If specified then regridding of the source ' 'against the target grid is enabled. If also using ' 'landmask-aware regridding, then this must be land_binary_mask ' 'data.')) regrid_group.add_argument( "--regrid_mode", default='bilinear', choices=['bilinear', 'nearest', 'nearest-with-mask'], help=('Selects which regridding technique to use. Default uses ' 'iris.analysis.Linear(); "nearest" uses Nearest() (Use for less ' 'continuous fields, e.g. precipitation.); "nearest-with-mask" ' 'ensures that target data are sourced from points with the same ' 'mask value (Use for coast-line-dependent variables like ' 'temperature).')) regrid_group.add_argument( "--extrapolation_mode", default='nanmask', help='Mode to use for extrapolating data into regions ' 'beyond the limits of the source_data domain. ' 'Refer to online documentation for iris.analysis. ' 'Modes are: ' 'extrapolate - The extrapolation points will ' 'take their value from the nearest source point. ' 'nan - The extrapolation points will be be ' 'set to NaN. ' 'error - A ValueError exception will be raised, ' 'notifying an attempt to extrapolate. ' 'mask - The extrapolation points will always be ' 'masked, even if the source data is not a ' 'MaskedArray. ' 'nanmask - If the source data is a MaskedArray ' 'the extrapolation points will be masked. ' 'Otherwise they will be set to NaN. ' 'Defaults to nanmask.') regrid_group.add_argument( "--input_landmask_filepath", metavar="INPUT_LANDMASK_FILE", help=("A path to a NetCDF file describing the land_binary_mask on " "the source-grid if coastline-aware regridding is required.")) regrid_group.add_argument( "--landmask_vicinity", metavar="LANDMASK_VICINITY", default=25000., type=float, help=("Radius of vicinity to search for a coastline, in metres. " "Default value; 25000 m")) parser.add_argument("--fix_float64", action='store_true', default=False, help="Check and fix cube for float64 data. Without " "this option an exception will be raised if " "float64 data is found but no fix applied.") parser.add_argument("--json_file", metavar="JSON_FILE", default=None, help='Filename for the json file containing required ' 'changes that will be applied ' 'to the metadata. Defaults to None.') args = parser.parse_args(args=argv) if args.target_grid_filepath or args.json_file or args.fix_float64: if not args.output_filepath: msg = ("An argument has been specified that requires an output " "filepath but none has been provided") raise ValueError(msg) if (args.input_landmask_filepath and "nearest-with-mask" not in args.regrid_mode): msg = ("Land-mask file supplied without appropriate regrid_mode. " "Use --regrid_mode=nearest-with-mask.") raise ValueError(msg) if args.input_landmask_filepath and not args.target_grid_filepath: msg = ("Cannot specify input_landmask_filepath without " "target_grid_filepath") raise ValueError(msg) # Load Cube and json metadata_dict = load_json_or_none(args.json_file) # source file data path is a mandatory argument output_data = load_cube(args.source_data_filepath) target_grid = None source_landsea = None if args.target_grid_filepath: target_grid = load_cube(args.target_grid_filepath) if args.regrid_mode in ["nearest-with-mask"]: if not args.input_landmask_filepath: msg = ("An argument has been specified that requires an input " "landmask filepath but none has been provided") raise ValueError(msg) source_landsea = load_cube(args.input_landmask_filepath) # Process Cube output_data = process(output_data, target_grid, source_landsea, metadata_dict, args.regrid_mode, args.extrapolation_mode, args.landmask_vicinity, args.fix_float64) # Save Cube if args.output_filepath: save_netcdf(output_data, args.output_filepath)
def main(argv=None): """Load in arguments and start spotdata extraction process.""" parser = ArgParser( description="Extract diagnostic data from gridded fields for spot data" " sites. It is possible to apply a temperature lapse rate adjustment" " to temperature data that helps to account for differences between" " the spot sites real altitude and that of the grid point from which" " the temperature data is extracted.") # Input and output files required. parser.add_argument("neighbour_filepath", metavar="NEIGHBOUR_FILEPATH", help="Path to a NetCDF file of spot-data neighbours. " "This file also contains the spot site information.") parser.add_argument("diagnostic_filepath", metavar="DIAGNOSTIC_FILEPATH", help="Path to a NetCDF file containing the diagnostic " "data to be extracted.") parser.add_argument("temperature_lapse_rate_filepath", metavar="LAPSE_RATE_FILEPATH", nargs='?', help="(Optional) Filepath to a NetCDF file containing" " temperature lapse rates. If this cube is provided," " and a screen temperature cube is being processed," " the lapse rates will be used to adjust the" " temperatures to better represent each spot's" " site-altitude.") parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH", help="The output path for the resulting NetCDF") parser.add_argument( "--apply_lapse_rate_correction", default=False, action="store_true", help="If the option is set and a lapse rate cube has been " "provided, extracted screen temperatures will be adjusted to " "better match the altitude of the spot site for which they have " "been extracted.") method_group = parser.add_argument_group( title="Neighbour finding method", description="If none of these options are set, the nearest grid point " "to a spot site will be used without any other constraints.") method_group.add_argument( "--land_constraint", default=False, action='store_true', help="If set the neighbour cube will be interrogated for grid point" " neighbours that were identified using a land constraint. This means" " that the grid points should be land points except for sites where" " none were found within the search radius when the neighbour cube was" " created. May be used with minimum_dz.") method_group.add_argument( "--minimum_dz", default=False, action='store_true', help="If set the neighbour cube will be interrogated for grid point" " neighbours that were identified using a minimum height difference" " constraint. These are grid points that were found to be the closest" " in altitude to the spot site within the search radius defined when" " the neighbour cube was created. May be used with land_constraint.") percentile_group = parser.add_argument_group( title="Extract percentiles", description="Extract particular percentiles from probabilistic, " "percentile, or realization inputs. If deterministic input is " "provided a warning is raised and all leading dimensions are included " "in the returned spot-data cube.") percentile_group.add_argument( "--extract_percentiles", default=None, nargs='+', type=int, help="If set to a percentile value or a list of percentile values, " "data corresponding to those percentiles will be returned. For " "example setting '--extract_percentiles 25 50 75' will result in the " "25th, 50th, and 75th percentiles being returned from a cube of " "probabilities, percentiles, or realizations. Note that for " "percentile inputs, the desired percentile(s) must exist in the input " "cube.") parser.add_argument( "--ecc_bounds_warning", default=False, action="store_true", help="If True, where calculated percentiles are outside the ECC " "bounds range, raise a warning rather than an exception.") meta_group = parser.add_argument_group("Metadata") meta_group.add_argument( "--metadata_json", metavar="METADATA_JSON", default=None, help="If provided, this JSON file can be used to modify the metadata " "of the returned netCDF file. Defaults to None.") output_group = parser.add_argument_group("Suppress Verbose output") # This CLI may be used to prepare data for verification without knowing the # form of the input, be it deterministic, realizations or probabilistic. # A warning is normally raised when attempting to extract a percentile from # deterministic data as this is not possible; the spot-extraction of the # entire cube is returned. When preparing data for verification we know # that we will produce a large number of these warnings when passing in # deterministic data. This option to suppress warnings is provided to # reduce the amount of unneeded logging information that is written out. output_group.add_argument( "--suppress_warnings", default=False, action="store_true", help="Suppress warning output. This option should only be used if " "it is known that warnings will be generated but they are not " "required.") args = parser.parse_args(args=argv) neighbour_cube = load_cube(args.neighbour_filepath) diagnostic_cube = load_cube(args.diagnostic_filepath) neighbour_selection_method = NeighbourSelection( land_constraint=args.land_constraint, minimum_dz=args.minimum_dz).neighbour_finding_method_name() plugin = SpotExtraction( neighbour_selection_method=neighbour_selection_method) result = plugin.process(neighbour_cube, diagnostic_cube) # If a probability or percentile diagnostic cube is provided, extract # the given percentile if available. This is done after the spot-extraction # to minimise processing time; usually there are far fewer spot sites than # grid points. if args.extract_percentiles: try: perc_coordinate = find_percentile_coordinate(result) except CoordinateNotFoundError: if 'probability_of_' in result.name(): result = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning).process( result, percentiles=args.extract_percentiles) result = iris.util.squeeze(result) elif result.coords('realization', dim_coords=True): fast_percentile_method = ( False if np.ma.isMaskedArray(result.data) else True) result = PercentileConverter( 'realization', percentiles=args.extract_percentiles, fast_percentile_method=fast_percentile_method).process( result) else: msg = ('Diagnostic cube is not a known probabilistic type. ' 'The {} percentile could not be extracted. Extracting ' 'data from the cube including any leading ' 'dimensions.'.format( args.extract_percentiles)) if not args.suppress_warnings: warnings.warn(msg) else: constraint = ['{}={}'.format(perc_coordinate.name(), args.extract_percentiles)] perc_result = extract_subcube(result, constraint) if perc_result is not None: result = perc_result else: msg = ('The percentile diagnostic cube does not contain the ' 'requested percentile value. Requested {}, available ' '{}'.format(args.extract_percentiles, perc_coordinate.points)) raise ValueError(msg) # Check whether a lapse rate cube has been provided and we are dealing with # temperature data and the lapse-rate option is enabled. if (args.temperature_lapse_rate_filepath and args.apply_lapse_rate_correction): if not result.name() == "air_temperature": msg = ("A lapse rate cube was provided, but the diagnostic being " "processed is not air temperature and cannot be adjusted.") raise ValueError(msg) lapse_rate_cube = load_cube(args.temperature_lapse_rate_filepath) if not lapse_rate_cube.name() == 'air_temperature_lapse_rate': msg = ("A cube has been provided as a lapse rate cube but does " "not have the expected name air_temperature_lapse_rate: " "{}".format(lapse_rate_cube.name())) raise ValueError(msg) try: lapse_rate_height_coord = lapse_rate_cube.coord("height") except (ValueError, CoordinateNotFoundError): msg = ("Lapse rate cube does not contain a single valued height " "coordinate. This is required to ensure it is applied to " "equivalent temperature data.") raise ValueError(msg) # Check the height of the temperature data matches that used to # calculate the lapse rates. If so, adjust temperatures using the lapse # rate values. if diagnostic_cube.coord("height") == lapse_rate_height_coord: plugin = SpotLapseRateAdjust( neighbour_selection_method=neighbour_selection_method) result = plugin.process(result, neighbour_cube, lapse_rate_cube) else: msg = ("A lapse rate cube was provided, but the height of " "the temperature data does not match that of the data used " "to calculate the lapse rates. As such the temperatures " "were not adjusted with the lapse rates.") if not args.suppress_warnings: warnings.warn(msg) elif (args.apply_lapse_rate_correction and not args.temperature_lapse_rate_filepath): msg = ("A lapse rate cube was not provided, but the option to " "apply the lapse rate correction was enabled. No lapse rate " "correction could be applied.") if not args.suppress_warnings: warnings.warn(msg) # Modify final metadata as described by provided JSON file. if args.metadata_json: with open(args.metadata_json, 'r') as input_file: metadata_dict = json.load(input_file) result = amend_metadata(result, **metadata_dict) # Remove the internal model_grid_hash attribute if present. result.attributes.pop('model_grid_hash', None) # Save the spot data cube. save_netcdf(result, args.output_filepath)
def main(argv=None): """Load in arguments and start spotdata extraction process.""" parser = ArgParser( description="Extract diagnostic data from gridded fields for spot data" " sites. It is possible to apply a temperature lapse rate adjustment" " to temperature data that helps to account for differences between" " the spot sites real altitude and that of the grid point from which" " the temperature data is extracted.") # Input and output files required. parser.add_argument("neighbour_filepath", metavar="NEIGHBOUR_FILEPATH", help="Path to a NetCDF file of spot-data neighbours. " "This file also contains the spot site information.") parser.add_argument("diagnostic_filepath", metavar="DIAGNOSTIC_FILEPATH", help="Path to a NetCDF file containing the diagnostic " "data to be extracted.") parser.add_argument("temperature_lapse_rate_filepath", metavar="LAPSE_RATE_FILEPATH", nargs='?', help="(Optional) Filepath to a NetCDF file containing" " temperature lapse rates. If this cube is provided," " and a screen temperature cube is being processed," " the lapse rates will be used to adjust the" " temperatures to better represent each spot's" " site-altitude.") parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH", help="The output path for the resulting NetCDF") parser.add_argument( "--apply_lapse_rate_correction", default=False, action="store_true", help="If the option is set and a lapse rate cube has been " "provided, extracted screen temperatures will be adjusted to " "better match the altitude of the spot site for which they have " "been extracted.") method_group = parser.add_argument_group( title="Neighbour finding method", description="If none of these options are set, the nearest grid point " "to a spot site will be used without any other constraints.") method_group.add_argument( "--land_constraint", default=False, action='store_true', help="If set the neighbour cube will be interrogated for grid point" " neighbours that were identified using a land constraint. This means" " that the grid points should be land points except for sites where" " none were found within the search radius when the neighbour cube was" " created. May be used with minimum_dz.") method_group.add_argument( "--minimum_dz", default=False, action='store_true', help="If set the neighbour cube will be interrogated for grid point" " neighbours that were identified using a minimum height difference" " constraint. These are grid points that were found to be the closest" " in altitude to the spot site within the search radius defined when" " the neighbour cube was created. May be used with land_constraint.") percentile_group = parser.add_argument_group( title="Extract percentiles", description="Extract particular percentiles from probabilistic, " "percentile, or realization inputs. If deterministic input is " "provided a warning is raised and all leading dimensions are included " "in the returned spot-data cube.") percentile_group.add_argument( "--extract_percentiles", default=None, nargs='+', type=float, help="If set to a percentile value or a list of percentile values, " "data corresponding to those percentiles will be returned. For " "example setting '--extract_percentiles 25 50 75' will result in the " "25th, 50th, and 75th percentiles being returned from a cube of " "probabilities, percentiles, or realizations. Note that for " "percentile inputs, the desired percentile(s) must exist in the input " "cube.") parser.add_argument( "--ecc_bounds_warning", default=False, action="store_true", help="If True, where calculated percentiles are outside the ECC " "bounds range, raise a warning rather than an exception.") meta_group = parser.add_argument_group("Metadata") meta_group.add_argument( "--metadata_json", metavar="METADATA_JSON", default=None, help="If provided, this JSON file can be used to modify the metadata " "of the returned netCDF file. Defaults to None.") output_group = parser.add_argument_group("Suppress Verbose output") # This CLI may be used to prepare data for verification without knowing the # form of the input, be it deterministic, realizations or probabilistic. # A warning is normally raised when attempting to extract a percentile from # deterministic data as this is not possible; the spot-extraction of the # entire cube is returned. When preparing data for verification we know # that we will produce a large number of these warnings when passing in # deterministic data. This option to suppress warnings is provided to # reduce the amount of unneeded logging information that is written out. output_group.add_argument( "--suppress_warnings", default=False, action="store_true", help="Suppress warning output. This option should only be used if " "it is known that warnings will be generated but they are not " "required.") args = parser.parse_args(args=argv) # Load Cube and JSON. neighbour_cube = load_cube(args.neighbour_filepath) diagnostic_cube = load_cube(args.diagnostic_filepath) lapse_rate_cube = load_cube(args.temperature_lapse_rate_filepath, allow_none=True) metadata_dict = load_json_or_none(args.metadata_json) # Process Cube result = process(neighbour_cube, diagnostic_cube, lapse_rate_cube, args.apply_lapse_rate_correction, args.land_constraint, args.minimum_dz, args.extract_percentiles, args.ecc_bounds_warning, metadata_dict, args.suppress_warnings) # Save Cube save_netcdf(result, args.output_filepath)
def main(argv=None): """Convert from probabilities to ensemble realizations via a CLI.""" cli_specific_arguments = [(['--no_of_realizations'], { 'metavar': 'NUMBER_OF_REALIZATIONS', 'default': None, 'type': int, 'help': ("Optional definition of the number of ensemble realizations to " "be generated. These are generated through an intermediate " "percentile representation. These percentiles will be " "distributed regularly with the aim of dividing into blocks of " "equal probability. If the reordering option is specified and " "the number of realizations is not given then the number of " "realizations is taken from the number of realizations in the " "raw forecast NetCDF file.") })] cli_definition = { 'central_arguments': ('input_file', 'output_file'), 'specific_arguments': cli_specific_arguments, 'description': ('Convert a dataset containing ' 'probabilities into one containing ' 'ensemble realizations.') } parser = ArgParser(**cli_definition) # add mutually exculsive options rebadge and reorder. # If reordering add option for raw ensemble - raise error if # raw ens missing. group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--reordering', default=False, action='store_true', help='The option used to create ensemble realizations ' 'from percentiles by reordering the input ' 'percentiles based on the order of the ' 'raw ensemble forecast.') group.add_argument('--rebadging', default=False, action='store_true', help='The option used to create ensemble realizations ' 'from percentiles by rebadging the input ' 'percentiles.') # If reordering, we need a raw ensemble forecast. reordering = parser.add_argument_group( 'Reordering options', 'Options for reordering the input percentiles ' 'using the raw ensemble forecast as required to create ensemble ' 'realizations.') reordering.add_argument('--raw_forecast_filepath', metavar='RAW_FORECAST_FILE', help='A path to an raw forecast NetCDF file to be ' 'processed. This option is compulsory, if the ' 'reordering option is selected.') reordering.add_argument( '--random_seed', default=None, help='Option to specify a value for the random seed for testing ' 'purposes, otherwise, the default random seed behaviour is ' 'utilised. The random seed is used in the generation of the ' 'random numbers used for splitting tied values ' 'within the raw ensemble, so that the values from the input ' 'percentiles can be ordered to match the raw ensemble.') reordering.add_argument( '--ecc_bounds_warning', default=False, action='store_true', help='If True, where percentiles (calculated as an intermediate ' 'output before realizations) exceed the ECC bounds range, raise ' 'a warning rather than an exception.') args = parser.parse_args(args=argv) # CLI argument checking: # Can only do one of reordering or rebadging: if options are passed which # correspond to the opposite method, raise an exception. # Note: Shouldn't need to check that both/none are set, since they are # defined as mandatory, but mutually exclusive, options. if args.rebadging: if ((args.raw_forecast_filepath is not None) or (args.random_seed is not None)): parser.wrong_args_error('raw_forecast_filepath, random_seed', 'rebadging') # Process the data cube = load_cube(args.input_filepath) if args.reordering: if args.raw_forecast_filepath is None: message = ("You must supply a raw forecast filepath if using the " "reordering option.") raise ValueError(message) else: raw_forecast = load_cube(args.raw_forecast_filepath) try: raw_forecast.coord("realization") except CoordinateNotFoundError: message = ("The netCDF file from the raw_forecast_filepath " "must have a realization coordinate.") raise ValueError(message) no_of_realizations = args.no_of_realizations # If no_of_realizations is not given, take the number from the raw # ensemble cube. if args.no_of_realizations is None: no_of_realizations = len(raw_forecast.coord("realization").points) cube = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning).process( cube, no_of_percentiles=no_of_realizations) cube = EnsembleReordering().process(cube, raw_forecast, random_ordering=False, random_seed=args.random_seed) elif args.rebadging: cube = GeneratePercentilesFromProbabilities( ecc_bounds_warning=args.ecc_bounds_warning).process( cube, no_of_percentiles=args.no_of_realizations) cube = RebadgePercentilesAsRealizations().process(cube) save_netcdf(cube, args.output_filepath)
def main(argv=None): """Extrapolate data forward in time.""" parser = ArgParser( description="Extrapolate input data to required lead times.") parser.add_argument("input_filepath", metavar="INPUT_FILEPATH", type=str, help="Path to input NetCDF file.") group = parser.add_mutually_exclusive_group() group.add_argument("--output_dir", metavar="OUTPUT_DIR", type=str, default="", help="Directory to write output files.") group.add_argument("--output_filepaths", nargs="+", type=str, help="List of full paths to output nowcast files, in " "order of increasing lead time.") optflw = parser.add_argument_group('Advect using files containing the x ' ' and y components of the velocity') optflw.add_argument("--eastward_advection_filepath", type=str, help="Path" " to input file containing Eastward advection " "velocities.") optflw.add_argument("--northward_advection_filepath", type=str, help="Path" " to input file containing Northward advection " "velocities.") speed = parser.add_argument_group('Advect using files containing speed and' ' direction') speed.add_argument("--advection_speed_filepath", type=str, help="Path" " to input file containing advection speeds," " usually wind speeds, on multiple pressure levels.") speed.add_argument("--advection_direction_filepath", type=str, help="Path to input file containing the directions from" " which advection speeds are coming (180 degrees from" " the direction in which the speed is directed). The" " directions should be on the same grid as the input" " speeds, including the same vertical levels.") speed.add_argument("--pressure_level", type=int, default=75000, help="The" " pressure level in Pa to extract from the multi-level" " advection_speed and advection_direction files. The" " velocities at this level are used for advection.") parser.add_argument("--orographic_enhancement_filepaths", nargs="+", type=str, default=None, help="List or wildcarded " "file specification to the input orographic " "enhancement files. Orographic enhancement files are " "compulsory for precipitation fields.") parser.add_argument("--json_file", metavar="JSON_FILE", default=None, help="Filename for the json file containing " "required changes to the metadata. Information " "describing the intended contents of the json file " "is available in " "improver.utilities.cube_metadata.amend_metadata." "Every output cube will have the metadata_dict " "applied. Defaults to None.", type=str) parser.add_argument("--max_lead_time", type=int, default=360, help="Maximum lead time required (mins).") parser.add_argument("--lead_time_interval", type=int, default=15, help="Interval between required lead times (mins).") accumulation_args = parser.add_argument_group( 'Calculate accumulations from advected fields') accumulation_args.add_argument( "--accumulation_fidelity", type=int, default=0, help="If set, this CLI will additionally return accumulations" " calculated from the advected fields. This fidelity specifies the" " time interval in minutes between advected fields that is used to" " calculate these accumulations. This interval must be a factor of" " the lead_time_interval.") accumulation_args.add_argument( "--accumulation_period", type=int, default=15, help="The period over which the accumulation is calculated (mins). " "Only full accumulation periods will be computed. At lead times " "that are shorter than the accumulation period, no accumulation " "output will be produced.") accumulation_args.add_argument( "--accumulation_units", type=str, default='m', help="Desired units in which the accumulations should be expressed," "e.g. mm") # Load Cubes args = parser.parse_args(args=argv) metadata_dict = load_json_or_none(args.json_file) upath, vpath = (args.eastward_advection_filepath, args.northward_advection_filepath) spath, dpath = (args.advection_speed_filepath, args.advection_direction_filepath) # load files and initialise advection plugin input_cube = load_cube(args.input_filepath) orographic_enhancement_cube = load_cube( args.orographic_enhancement_filepaths, allow_none=True) speed_cube = direction_cube = ucube = vcube = None if (upath and vpath) and not (spath or dpath): ucube = load_cube(upath) vcube = load_cube(vpath) elif (spath and dpath) and not (upath or vpath): level_constraint = Constraint(pressure=args.pressure_level) try: speed_cube = load_cube(spath, constraints=level_constraint) direction_cube = load_cube(dpath, constraints=level_constraint) except ValueError as err: raise ValueError( '{} Unable to extract specified pressure level from given ' 'speed and direction files.'.format(err)) else: raise ValueError('Cannot mix advection component velocities with speed' ' and direction') # Process Cubes accumulation_cubes, forecast_to_return = process( input_cube, ucube, vcube, speed_cube, direction_cube, orographic_enhancement_cube, metadata_dict, args.max_lead_time, args.lead_time_interval, args.accumulation_fidelity, args.accumulation_period, args.accumulation_units) # Save Cube if args.output_filepaths and \ len(args.output_filepaths) != len(forecast_to_return): raise ValueError("Require exactly one output file name for each " "forecast lead time") for i, cube in enumerate(forecast_to_return): # save to a suitably-named output file if args.output_filepaths: file_name = args.output_filepaths[i] else: file_name = os.path.join(args.output_dir, generate_file_name(cube)) save_netcdf(cube, file_name) if args.accumulation_fidelity > 0: # return accumulation cubes for i, cube in enumerate(accumulation_cubes): file_name = os.path.join(args.output_dir, generate_file_name(cube)) save_netcdf(cube, file_name)
def main(argv=None): """Load in arguments and get going.""" description = ( "Determine grid point coordinates within the provided cubes that " "neighbour spot data sites defined within the provided JSON " "file. If no options are set the returned netCDF file will contain the" " nearest neighbour found for each site. Other constrained neighbour " "finding methods can be set with options below.") options = ("\n\nThese methods are:\n\n" " 1. nearest neighbour\n" " 2. nearest land point neighbour\n" " 3. nearest neighbour with minimum height difference\n" " 4. nearest land point neighbour with minimum height " "difference") parser = ArgParser( description=('\n'.join(wrap(description, width=79)) + options), formatter_class=RawDescriptionHelpFormatter) parser.add_argument("site_list_filepath", metavar="SITE_LIST_FILEPATH", help="Path to a JSON file that contains the spot sites" " for which neighbouring grid points are to be found.") parser.add_argument("orography_filepath", metavar="OROGRAPHY_FILEPATH", help="Path to a NetCDF file of model orography for the" " model grid on which neighbours are being found.") parser.add_argument("landmask_filepath", metavar="LANDMASK_FILEPATH", help="Path to a NetCDF file of model land mask for the" " model grid on which neighbours are being found.") parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH", help="The output path for the resulting NetCDF") parser.add_argument( "--all_methods", default=False, action='store_true', help="If set this will return a cube containing the nearest grid point" " neighbours to spot sites as defined by each possible combination of" " constraints.") group = parser.add_argument_group('Apply constraints to neighbour choice') group.add_argument( "--land_constraint", default=False, action='store_true', help="If set this will return a cube containing the nearest grid point" " neighbours to spot sites that are also land points. May be used with" " the minimum_dz option.") group.add_argument( "--minimum_dz", default=False, action='store_true', help="If set this will return a cube containing the nearest grid point" " neighbour to each spot site that is found, within a given search" " radius, to minimise the height difference between the two. May be" " used with the land_constraint option.") group.add_argument( "--search_radius", metavar="SEARCH_RADIUS", type=float, help="The radius in metres about a spot site within which to search" " for a grid point neighbour that is land or which has a smaller " " height difference than the nearest. The default value is 10000m " "(10km).") group.add_argument( "--node_limit", metavar="NODE_LIMIT", type=int, help="When searching within the defined search_radius for suitable " "neighbours, a KDTree is constructed. This node_limit prevents the " "tree from becoming too large for large search radii. A default of 36" " is set, which is to say the nearest 36 grid points will be " "considered. If the search_radius is likely to contain more than 36 " "points, this value should be increased to ensure all points are " "considered.") s_group = parser.add_argument_group('Site list options') s_group.add_argument( "--site_coordinate_system", metavar="SITE_COORDINATE_SYSTEM", help="The coordinate system in which the site coordinates are provided" " within the site list. This must be provided as the name of a cartopy" " coordinate system. The default is a PlateCarree system, with site" " coordinates given by latitude/longitude pairs. This can be a" " complete definition, including parameters required to modify a" " default system, e.g. Miller(central_longitude=90). If a globe is" " required this can be specified as e.g." " Globe(semimajor_axis=100, semiminor_axis=100).") s_group.add_argument( "--site_coordinate_options", metavar="SITE_COORDINATE_OPTIONS", help="JSON formatted string of options passed to the cartopy" " coordinate system given in site_coordinate_system. \"globe\"" " is handled as a special case for options to construct a cartopy" " Globe object.") s_group.add_argument( "--site_x_coordinate", metavar="SITE_X_COORDINATE", help="The x coordinate key within the JSON file. The plugin default is" " 'longitude', but can be changed using this option if required.") s_group.add_argument( "--site_y_coordinate", metavar="SITE_Y_COORDINATE", help="The y coordinate key within the JSON file. The plugin default is" " 'latitude', but can be changed using this option if required.") args = parser.parse_args(args=argv) # Load Cubes and JSON. site_list = load_json_or_none(args.site_list_filepath) orography = load_cube(args.orography_filepath) landmask = load_cube(args.landmask_filepath) # Process Cube result = process(orography, landmask, site_list, args.all_methods, args.land_constraint, args.minimum_dz, args.search_radius, args.node_limit, args.site_coordinate_system, args.site_coordinate_options, args.site_x_coordinate, args.site_y_coordinate) # Save Cube save_netcdf(result, args.output_filepath)
def main(argv=None): """Load in arguments and get going.""" description = ( "Determine grid point coordinates within the provided cubes that " "neighbour spot data sites defined within the provided JSON " "file. If no options are set the returned netCDF file will contain the" " nearest neighbour found for each site. Other constrained neighbour " "finding methods can be set with options below.") options = ("\n\nThese methods are:\n\n 1. nearest neighbour\n" " 2. nearest land point neighbour\n" " 3. nearest neighbour with minimum height difference\n" " 4. nearest land point neighbour with minimum height " "difference") parser = ArgParser(description=('\n'.join(wrap(description, width=79)) + options), formatter_class=RawDescriptionHelpFormatter) parser.add_argument("site_list_filepath", metavar="SITE_LIST_FILEPATH", help="Path to a JSON file that contains the spot sites" " for which neighbouring grid points are to be found.") parser.add_argument("orography_filepath", metavar="OROGRAPHY_FILEPATH", help="Path to a NetCDF file of model orography for the" " model grid on which neighbours are being found.") parser.add_argument("landmask_filepath", metavar="LANDMASK_FILEPATH", help="Path to a NetCDF file of model land mask for the" " model grid on which neighbours are being found.") parser.add_argument("output_filepath", metavar="OUTPUT_FILEPATH", help="The output path for the resulting NetCDF") parser.add_argument( "--all_methods", default=False, action='store_true', help="If set this will return a cube containing the nearest grid point" " neighbours to spot sites as defined by each possible combination of" " constraints.") group = parser.add_argument_group('Apply constraints to neighbour choice') group.add_argument( "--land_constraint", default=False, action='store_true', help="If set this will return a cube containing the nearest grid point" " neighbours to spot sites that are also land points. May be used with" " the minimum_dz option.") group.add_argument( "--minimum_dz", default=False, action='store_true', help="If set this will return a cube containing the nearest grid point" " neighbour to each spot site that is found, within a given search" " radius, to minimise the height difference between the two. May be" " used with the land_constraint option.") group.add_argument( "--search_radius", metavar="SEARCH_RADIUS", type=float, help="The radius in metres about a spot site within which to search" " for a grid point neighbour that is land or which has a smaller " " height difference than the nearest. The default value is 10000m " "(10km).") group.add_argument( "--node_limit", metavar="NODE_LIMIT", type=int, help="When searching within the defined search_radius for suitable " "neighbours, a KDTree is constructed. This node_limit prevents the " "tree from becoming too large for large search radii. A default of 36" " is set, which is to say the nearest 36 grid points will be " "considered. If the search_radius is likely to contain more than 36 " "points, this value should be increased to ensure all points are " "considered.") s_group = parser.add_argument_group('Site list options') s_group.add_argument( "--site_coordinate_system", metavar="SITE_COORDINATE_SYSTEM", help="The coordinate system in which the site coordinates are provided" " within the site list. This must be provided as the name of a cartopy" " coordinate system. The default is a PlateCarree system, with site" " coordinates given by latitude/longitude pairs. This can be a" " complete definition, including parameters required to modify a" " default system, e.g. Miller(central_longitude=90). If a globe is" " required this can be specified as e.g." " Globe(semimajor_axis=100, semiminor_axis=100).") s_group.add_argument( "--site_x_coordinate", metavar="SITE_X_COORDINATE", help="The x coordinate key within the JSON file. The plugin default is" " 'longitude', but can be changed using this option if required.") s_group.add_argument( "--site_y_coordinate", metavar="SITE_Y_COORDINATE", help="The y coordinate key within the JSON file. The plugin default is" " 'latitude', but can be changed using this option if required.") meta_group = parser.add_argument_group("Metadata") meta_group.add_argument( "--metadata_json", metavar="METADATA_JSON", default=None, help="If provided, this JSON file can be used to modify the metadata " "of the returned netCDF file. Defaults to None.") args = parser.parse_args(args=argv) # Open input files with open(args.site_list_filepath, 'r') as site_file: sitelist = json.load(site_file) orography = load_cube(args.orography_filepath) landmask = load_cube(args.landmask_filepath) fargs = (sitelist, orography, landmask) # Filter kwargs for those expected by plugin and which are set. # This preserves the plugin defaults for unset options. kwarg_list = [ 'land_constraint', 'minimum_dz', 'search_radius', 'site_coordinate_system', 'site_x_coordinate', 'node_limit', 'site_y_coordinate' ] kwargs = { k: v for (k, v) in vars(args).items() if k in kwarg_list and v is not None } # Deal with coordinate systems for sites other than PlateCarree. if 'site_coordinate_system' in kwargs.keys(): scrs = kwargs['site_coordinate_system'] kwargs['site_coordinate_system'] = safe_eval(scrs, ccrs, PROJECTION_LIST) # Check valid options have been selected. if args.all_methods is True and (kwargs['land_constraint'] is True or kwargs['minimum_dz'] is True): raise ValueError( 'Cannot use all_methods option with other constraints.') # Call plugin to generate neighbour cubes if args.all_methods: methods = [] methods.append({ **kwargs, 'land_constraint': False, 'minimum_dz': False }) methods.append({ **kwargs, 'land_constraint': True, 'minimum_dz': False }) methods.append({ **kwargs, 'land_constraint': False, 'minimum_dz': True }) methods.append({**kwargs, 'land_constraint': True, 'minimum_dz': True}) all_methods = iris.cube.CubeList([]) for method in methods: all_methods.append(NeighbourSelection(**method).process(*fargs)) squeezed_cubes = iris.cube.CubeList([]) for index, cube in enumerate(all_methods): cube.coord('neighbour_selection_method').points = index squeezed_cubes.append(iris.util.squeeze(cube)) result = merge_cubes(squeezed_cubes) else: result = NeighbourSelection(**kwargs).process(*fargs) result = enforce_coordinate_ordering( result, ['spot_index', 'neighbour_selection_method', 'grid_attributes']) # Modify final metadata as described by provided JSON file. if args.metadata_json: with open(args.metadata_json, 'r') as input_file: metadata_dict = json.load(input_file) result = amend_metadata(result, **metadata_dict) # Save the neighbour cube save_netcdf(result, args.output_filepath)