Esempio n. 1
0
 def _make_partially_collapsed_coord(self, coord, grid, guessed_axis):
     """
     Make a new DimCoord which represents a partially collapsed (aggregated into bins) coordinate.
     This dimcoord will have a grid
     :type coord: data_io.Coord.Coord
     :param coord: Coordinate to partially collapse
     :type grid: aggregation.aggregation_grid.AggregationGrid
     :param grid: grid on which this coordinate will aggregate
     :type guessed_axis: str
     :param guessed_axis: String identifier of the axis to which this coordinate belongs (e.g. 'T', 'X')
     :return: DimCoord
     """
     if grid.is_time or guessed_axis == 'T':
         # Ensure that the limits are date/times.
         dt = parse_datetime.convert_datetime_components_to_datetime(grid.start, True)
         grid_start = Subset._convert_datetime_to_coord_unit(coord, dt)
         dt = parse_datetime.convert_datetime_components_to_datetime(grid.end, False)
         grid_end = Subset._convert_datetime_to_coord_unit(coord, dt)
         grid_delta = grid.delta
     else:
         # Assume to be a non-time axis
         (grid_start, grid_end) = Subset._fix_non_circular_limits(float(grid.start), float(grid.end))
         grid_delta = float(grid.delta)
     new_coordinate_grid = aggregation_grid_array(grid_start, grid_end, grid_delta, grid.is_time, coord)
     new_coord = DimCoord(new_coordinate_grid, var_name=coord.name(), standard_name=coord.standard_name,
                          units=coord.units)
     if len(new_coord.points) == 1:
         new_coord.bounds = [[grid_start, grid_end]]
     else:
         new_coord.guess_bounds()
     return new_coord
Esempio n. 2
0
    def test_GIVEN_single_variable_WHEN_subset_THEN_DataWriter_called_correctly(self):
        variable = 'var_name'
        filename = 'filename'
        xmin, xmax = -10, 10
        ymin, ymax = 40, 60
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        def _mock_subset(data, constraint):
            data.data += 1  # Modify the data slightly so we can be sure it's passed in correctly
            return data

        mock_subsetter = Subsetter()
        mock_subsetter.subset = _mock_subset
        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data())
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = MagicMock()

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variable, filename, product=None)

        assert_that(mock_data_writer.write_data.call_count, is_(1))
        written_data = mock_data_writer.write_data.call_args[0][0]
        written_filename = mock_data_writer.write_data.call_args[0][1]
        assert_that(written_data.data_flattened.tolist(), is_([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]))
        assert_that(written_filename, is_(output_file))
Esempio n. 3
0
    def test_GIVEN_single_variable_WHEN_subset_THEN_Subsetter_called_correctly(self):
        variable = 'var_name'
        filename = 'filename'
        xmin, xmax = -10, 10
        ymin, ymax = 40, 60
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data())
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0])  # Return the data array unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variable, filename, product=None)
        assert_that(mock_subsetter.subset.call_count, is_(1))
        called_data = mock_subsetter.subset.call_args[0][0]
        called_constraint = mock_subsetter.subset.call_args[0][1]
        assert_that(called_data.data_flattened.tolist(),
                    is_([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]))
        assert_that(called_constraint, instance_of(UngriddedSubsetConstraint))
        assert_that(called_constraint._limits['lat'][1:3], is_((ymin, ymax)))
        assert_that(called_constraint._limits['lon'][1:3], is_((xmin, xmax)))
Esempio n. 4
0
    def test_GIVEN_multiple_variables_WHEN_subset_THEN_DataWriter_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filename = 'filename'
        xmin, xmax = 0, 5
        ymin, ymax = -5, 5
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        def _mock_subset(data, constraint):
            # Modify the data slightly so we can be sure it's passed in correctly
            for var in data:
                var.data += 1
            return data

        mock_subsetter = Subsetter()
        mock_subsetter.subset = _mock_subset
        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList([make_square_5x3_2d_cube(),
                                                                                  make_square_5x3_2d_cube()]))
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = MagicMock()

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variables, filename, product=None)

        assert_that(mock_data_writer.write_data.call_count, is_(1))
        written_data = mock_data_writer.write_data.call_args[0][0]
        written_filename = mock_data_writer.write_data.call_args[0][1]
        assert_that(written_data[0].data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]]))
        assert_that(written_data[0].data.tolist(), written_data[1].data.tolist())
        assert_that(written_filename, is_(output_file))
Esempio n. 5
0
    def test_GIVEN_multiple_variables_WHEN_subset_THEN_Subsetter_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filename = 'filename'
        xmin, xmax = 0, 5
        ymin, ymax = -5, 5
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}
        output_file = 'output.hdf'

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()]))
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0])  # Return the data list unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variables, filename, product=None)
        assert_that(mock_subsetter.subset.call_count, is_(1))
        called_data = mock_subsetter.subset.call_args[0][0]
        called_constraint = mock_subsetter.subset.call_args[0][1]
        assert_that(called_data[0].data.tolist(), is_([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]))
        assert_that(called_data[1].data.tolist(), is_(called_data[0].data.tolist()))
        assert_that(called_constraint, instance_of(GriddedSubsetConstraint))
        assert_that(called_constraint._limits['latitude'][1:3], is_((ymin, ymax)))
        assert_that(called_constraint._limits['longitude'][1:3], is_((xmin, xmax)))
Esempio n. 6
0
def subset_cmd(main_arguments):
    """
    Main routine for handling calls to the subset command.

    :param main_arguments:    The command line arguments (minus the subset command)
    """
    from cis.subsetting.subset import Subset

    if len(main_arguments.datagroups) > 1:
        __error_occurred("Subsetting can only be performed on one data group")
    input_group = main_arguments.datagroups[0]

    variables = input_group['variables']
    filenames = input_group['filenames']
    product = input_group["product"] if input_group["product"] is not None else None

    subset = Subset(main_arguments.limits, main_arguments.output)
    subset.subset(variables, filenames, product)
Esempio n. 7
0
def aggregation_grid_array(start, end, delta, is_time, coordinate):
    if is_time:
        start_dt = Subset._convert_coord_unit_to_datetime(coordinate, start)
        end_dt = Subset._convert_coord_unit_to_datetime(coordinate, end)

        # Some logic to find the mid point to start on
        if delta.year > 0:
            start_dt = add_year_midpoint(start_dt, delta.year)

        # We make an assumption here that half a month is always 15 days.
        if delta.month > 0:
            start_dt = add_month_midpoint(start_dt, delta.month)

        dt = datetime.timedelta(days=delta.day, seconds=delta.second, microseconds=0, milliseconds=0,
                                minutes=delta.minute, hours=delta.hour, weeks=0)

        start_dt += dt / 2

        new_time_grid = []
        new_time = start_dt

        while new_time < end_dt:
            new_time_grid.append(Subset._convert_datetime_to_coord_unit(coordinate, new_time))

            new_year = new_time.year + delta.year
            new_month = new_time.month + delta.month
            if new_month > 12:
                new_month, new_year = month_past_end_of_year(new_month, new_year)
            # TODO this is a slightly inelegant fix for the problem of something like 30th Jan +1 month
            # Need to work out what correct behaviour should be in this case.
            try:
                new_time = new_time.replace(year=new_year, month=new_month)
            except ValueError:
                new_time += datetime.timedelta(days=28)
            new_time += datetime.timedelta(days=delta.day, seconds=delta.second, microseconds=0, milliseconds=0,
                                           minutes=delta.minute, hours=delta.hour, weeks=0)

        new_time_grid = numpy.array(new_time_grid)

        return new_time_grid
    else:
        new_grid = numpy.arange(start + delta / 2, end + delta / 2, delta)

        return new_grid
Esempio n. 8
0
    def test_GIVEN_multiple_variables_and_filenames_WHEN_subset_THEN_DataReader_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filenames = ['filename1', 'filename2']
        output_file = 'output.hdf'
        xmin, xmax = 0, 5
        ymin, ymax = -5, 5
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()]))
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = lambda *args: args[0]  # Return the data array unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variables, filenames, product=None)
        assert_that(mock_data_reader.read_data_list.call_count, is_(1))
        assert_that(mock_data_reader.read_data_list.call_args[0][0], filenames)
        assert_that(mock_data_reader.read_data_list.call_args[0][1], variables)
Esempio n. 9
0
    def test_GIVEN_single_variable_WHEN_subset_THEN_DataReader_called_correctly(self):
        variable = 'var_name'
        filename = 'filename'
        output_file = 'output.hdf'
        xmin, xmax = -10, 10
        ymin, ymax = 40, 60
        limits = {'x': SubsetLimits(xmin, xmax, False),
                  'y': SubsetLimits(ymin, ymax, False)}

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data())
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_subsetter = Subsetter()
        mock_subsetter.subset = lambda *args: args[0]  # Return the data array unmodified

        subset = Subset(limits, output_file, subsetter=mock_subsetter,
                        data_reader=mock_data_reader, data_writer=mock_data_writer)
        subset.subset(variable, filename, product=None)
        assert_that(mock_data_reader.read_data_list.call_count, is_(1))
        assert_that(mock_data_reader.read_data_list.call_args[0][0], filename)
        assert_that(mock_data_reader.read_data_list.call_args[0][1], variable)