def _make_partially_collapsed_coord(self, coord, grid, guessed_axis): """ Make a new DimCoord which represents a partially collapsed (aggregated into bins) coordinate. This dimcoord will have a grid :type coord: data_io.Coord.Coord :param coord: Coordinate to partially collapse :type grid: aggregation.aggregation_grid.AggregationGrid :param grid: grid on which this coordinate will aggregate :type guessed_axis: str :param guessed_axis: String identifier of the axis to which this coordinate belongs (e.g. 'T', 'X') :return: DimCoord """ if grid.is_time or guessed_axis == 'T': # Ensure that the limits are date/times. dt = parse_datetime.convert_datetime_components_to_datetime(grid.start, True) grid_start = Subset._convert_datetime_to_coord_unit(coord, dt) dt = parse_datetime.convert_datetime_components_to_datetime(grid.end, False) grid_end = Subset._convert_datetime_to_coord_unit(coord, dt) grid_delta = grid.delta else: # Assume to be a non-time axis (grid_start, grid_end) = Subset._fix_non_circular_limits(float(grid.start), float(grid.end)) grid_delta = float(grid.delta) new_coordinate_grid = aggregation_grid_array(grid_start, grid_end, grid_delta, grid.is_time, coord) new_coord = DimCoord(new_coordinate_grid, var_name=coord.name(), standard_name=coord.standard_name, units=coord.units) if len(new_coord.points) == 1: new_coord.bounds = [[grid_start, grid_end]] else: new_coord.guess_bounds() return new_coord
def test_GIVEN_single_variable_WHEN_subset_THEN_DataWriter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): data.data += 1 # Modify the data slightly so we can be sure it's passed in correctly return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data_flattened.tolist(), is_([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])) assert_that(written_filename, is_(output_file))
def test_GIVEN_single_variable_WHEN_subset_THEN_Subsetter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data.data_flattened.tolist(), is_([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])) assert_that(called_constraint, instance_of(UngriddedSubsetConstraint)) assert_that(called_constraint._limits['lat'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['lon'][1:3], is_((xmin, xmax)))
def test_GIVEN_multiple_variables_WHEN_subset_THEN_DataWriter_called_correctly(self): variables = ['var_name1', 'var_name2'] filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): # Modify the data slightly so we can be sure it's passed in correctly for var in data: var.data += 1 return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList([make_square_5x3_2d_cube(), make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data[0].data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_data[0].data.tolist(), written_data[1].data.tolist()) assert_that(written_filename, is_(output_file))
def test_GIVEN_multiple_variables_WHEN_subset_THEN_Subsetter_called_correctly(self): variables = ['var_name1', 'var_name2'] filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data list unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data[0].data.tolist(), is_([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]])) assert_that(called_data[1].data.tolist(), is_(called_data[0].data.tolist())) assert_that(called_constraint, instance_of(GriddedSubsetConstraint)) assert_that(called_constraint._limits['latitude'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['longitude'][1:3], is_((xmin, xmax)))
def subset_cmd(main_arguments): """ Main routine for handling calls to the subset command. :param main_arguments: The command line arguments (minus the subset command) """ from cis.subsetting.subset import Subset if len(main_arguments.datagroups) > 1: __error_occurred("Subsetting can only be performed on one data group") input_group = main_arguments.datagroups[0] variables = input_group['variables'] filenames = input_group['filenames'] product = input_group["product"] if input_group["product"] is not None else None subset = Subset(main_arguments.limits, main_arguments.output) subset.subset(variables, filenames, product)
def aggregation_grid_array(start, end, delta, is_time, coordinate): if is_time: start_dt = Subset._convert_coord_unit_to_datetime(coordinate, start) end_dt = Subset._convert_coord_unit_to_datetime(coordinate, end) # Some logic to find the mid point to start on if delta.year > 0: start_dt = add_year_midpoint(start_dt, delta.year) # We make an assumption here that half a month is always 15 days. if delta.month > 0: start_dt = add_month_midpoint(start_dt, delta.month) dt = datetime.timedelta(days=delta.day, seconds=delta.second, microseconds=0, milliseconds=0, minutes=delta.minute, hours=delta.hour, weeks=0) start_dt += dt / 2 new_time_grid = [] new_time = start_dt while new_time < end_dt: new_time_grid.append(Subset._convert_datetime_to_coord_unit(coordinate, new_time)) new_year = new_time.year + delta.year new_month = new_time.month + delta.month if new_month > 12: new_month, new_year = month_past_end_of_year(new_month, new_year) # TODO this is a slightly inelegant fix for the problem of something like 30th Jan +1 month # Need to work out what correct behaviour should be in this case. try: new_time = new_time.replace(year=new_year, month=new_month) except ValueError: new_time += datetime.timedelta(days=28) new_time += datetime.timedelta(days=delta.day, seconds=delta.second, microseconds=0, milliseconds=0, minutes=delta.minute, hours=delta.hour, weeks=0) new_time_grid = numpy.array(new_time_grid) return new_time_grid else: new_grid = numpy.arange(start + delta / 2, end + delta / 2, delta) return new_grid
def test_GIVEN_multiple_variables_and_filenames_WHEN_subset_THEN_DataReader_called_correctly(self): variables = ['var_name1', 'var_name2'] filenames = ['filename1', 'filename2'] output_file = 'output.hdf' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = lambda *args: args[0] # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filenames, product=None) assert_that(mock_data_reader.read_data_list.call_count, is_(1)) assert_that(mock_data_reader.read_data_list.call_args[0][0], filenames) assert_that(mock_data_reader.read_data_list.call_args[0][1], variables)
def test_GIVEN_single_variable_WHEN_subset_THEN_DataReader_called_correctly(self): variable = 'var_name' filename = 'filename' output_file = 'output.hdf' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = lambda *args: args[0] # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_reader.read_data_list.call_count, is_(1)) assert_that(mock_data_reader.read_data_list.call_args[0][0], filename) assert_that(mock_data_reader.read_data_list.call_args[0][1], variable)