def test_complete_collapse_two_dims_using_moments_kernel(self):
        self.kernel = aggregation_kernels['moments']
        data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data1.var_name = 'var1'
        data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data2.var_name = 'var2'
        data2.data += 10
        data = GriddedDataList([data1, data2])
        grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False),
                'y': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)}

        agg = Aggregator(data, grid)
        output = agg.aggregate_gridded(self.kernel)

        expect_mean = numpy.array(7.75)
        expect_stddev = numpy.array(numpy.sqrt(244.25 / 11))
        expect_count = numpy.array(12)

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, mean_2, stddev_1, stddev_2, count_1, count_2 = output
        assert mean_1.var_name == 'var1'
        assert stddev_1.var_name == 'var1_std_dev'
        assert count_1.var_name == 'var1_num_points'
        assert mean_2.var_name == 'var2'
        assert stddev_2.var_name == 'var2_std_dev'
        assert count_2.var_name == 'var2_num_points'
        # Latitude area weighting means these aren't quite right so increase the rtol.
        assert numpy.allclose(mean_1.data, expect_mean, 1e-3)
        assert numpy.allclose(mean_2.data, expect_mean + 10, 1e-3)
        assert numpy.allclose(stddev_1.data, expect_stddev)
        assert numpy.allclose(stddev_2.data, expect_stddev)
        assert numpy.allclose(count_1.data, expect_count)
        assert numpy.allclose(count_2.data, expect_count)
    def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5))
        data2 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1))
        datalist = GriddedDataList([data1, data2])

        grid = {'t': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), True),
                'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)}

        agg = Aggregator(datalist, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0],
                                   [156.0, 157.0, 158.0, 159.0, 160.0],
                                   [261.0, 262.0, 263.0, 264.0, 265.0],
                                   [366.0, 367.0, 368.0, 369.0, 370.0],
                                   [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float)

        multidim_coord_points = numpy.array([1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float)

        assert_arrays_almost_equal(cube_out[0].data, result_data)
        assert_arrays_almost_equal(cube_out[1].data, result_data+1)
        assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points)
        assert_arrays_almost_equal(cube_out[1].coord('surface_air_pressure').points, multidim_coord_points)
    def test_complete_collapse_one_dim_using_moments_kernel(self):
        self.kernel = aggregation_kernels['moments']
        data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data1.var_name = 'var1'
        data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data2.var_name = 'var2'
        data2.data += 10
        data = GriddedDataList([data1, data2])

        grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)}

        agg = Aggregator(data, grid)
        output = agg.aggregate_gridded(self.kernel)

        expect_mean = numpy.array([[5.5, 8.75, 9]])
        expect_stddev = numpy.array([numpy.sqrt(15), numpy.sqrt(26.25), numpy.sqrt(30)])
        expect_count = numpy.array([[4, 4, 4]])

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, mean_2, stddev_1, stddev_2, count_1, count_2 = output
        assert mean_1.var_name == 'var1'
        assert stddev_1.var_name == 'var1_std_dev'
        assert count_1.var_name == 'var1_num_points'
        assert mean_2.var_name == 'var2'
        assert stddev_2.var_name == 'var2_std_dev'
        assert count_2.var_name == 'var2_num_points'
        assert_arrays_almost_equal(mean_1.data, expect_mean)
        assert_arrays_almost_equal(mean_2.data, expect_mean + 10)
        assert_arrays_almost_equal(stddev_1.data, expect_stddev)
        assert_arrays_almost_equal(stddev_2.data, expect_stddev)
        assert_arrays_almost_equal(count_1.data, expect_count)
        assert_arrays_almost_equal(count_2.data, expect_count)
    def test_GIVEN_single_variable_WHEN_aggregate_THEN_DataWriter_called_correctly(self):
        variables = 'var_name'
        filenames = 'filename'
        output_file = 'output.hdf'
        kernel = 'mean'
        grid = None
        input_data = GriddedDataList([make_from_cube(make_square_5x3_2d_cube())])
        output_data = make_from_cube(make_square_5x3_2d_cube() + 1)

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=input_data)
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_aggregator = Aggregator(None, None)
        mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data)  # Return the modified data array

        aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer)
        aggregate._create_aggregator = MagicMock(return_value=mock_aggregator)
        aggregate.aggregate(variables, filenames, None, kernel)

        assert_that(mock_data_writer.write_data.call_count, is_(1))
        written_data = mock_data_writer.write_data.call_args[0][0]
        written_filename = mock_data_writer.write_data.call_args[0][1]
        assert_that(written_data.data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]]))
        assert_that(written_filename, is_(output_file))
    def test_partial_aggregation_over_multidimensional_coord_along_middle_of_cube(self):
        from cis.data_io.gridded_data import GriddedData
        # JASCIS-126
        self.cube = make_mock_cube(time_dim_length=7, hybrid_pr_len=5)
        grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)}

        agg = Aggregator(GriddedData.make_from_cube(self.cube), grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result_data = numpy.array([[[36.0, 37.0, 38.0, 39.0, 40.0],
                                    [41.0, 42.0, 43.0, 44.0, 45.0],
                                    [46.0, 47.0, 48.0, 49.0, 50.0],
                                    [51.0, 52.0, 53.0, 54.0, 55.0],
                                    [56.0, 57.0, 58.0, 59.0, 60.0],
                                    [61.0, 62.0, 63.0, 64.0, 65.0],
                                    [66.0, 67.0, 68.0, 69.0, 70.0]],

                                   [[141.0, 142.0, 143.0, 144.0, 145.0],
                                    [146.0, 147.0, 148.0, 149.0, 150.0],
                                    [151.0, 152.0, 153.0, 154.0, 155.0],
                                    [156.0, 157.0, 158.0, 159.0, 160.0],
                                    [161.0, 162.0, 163.0, 164.0, 165.0],
                                    [166.0, 167.0, 168.0, 169.0, 170.0],
                                    [171.0, 172.0, 173.0, 174.0, 175.0]],

                                   [[246.0, 247.0, 248.0, 249.0, 250.0],
                                    [251.0, 252.0, 253.0, 254.0, 255.0],
                                    [256.0, 257.0, 258.0, 259.0, 260.0],
                                    [261.0, 262.0, 263.0, 264.0, 265.0],
                                    [266.0, 267.0, 268.0, 269.0, 270.0],
                                    [271.0, 272.0, 273.0, 274.0, 275.0],
                                    [276.0, 277.0, 278.0, 279.0, 280.0]],

                                   [[351.0, 352.0, 353.0, 354.0, 355.0],
                                    [356.0, 357.0, 358.0, 359.0, 360.0],
                                    [361.0, 362.0, 363.0, 364.0, 365.0],
                                    [366.0, 367.0, 368.0, 369.0, 370.0],
                                    [371.0, 372.0, 373.0, 374.0, 375.0],
                                    [376.0, 377.0, 378.0, 379.0, 380.0],
                                    [381.0, 382.0, 383.0, 384.0, 385.0]],

                                   [[456.0, 457.0, 458.0, 459.0, 460.0],
                                    [461.0, 462.0, 463.0, 464.0, 465.0],
                                    [466.0, 467.0, 468.0, 469.0, 470.0],
                                    [471.0, 472.0, 473.0, 474.0, 475.0],
                                    [476.0, 477.0, 478.0, 479.0, 480.0],
                                    [481.0, 482.0, 483.0, 484.0, 485.0],
                                    [486.0, 487.0, 488.0, 489.0, 490.0]]], dtype=np.float)

        multidim_coord_points = numpy.array([[700000., 800000., 900000., 1000000., 1100000., 1200000., 1300000.],
                                             [2800000., 2900000., 3000000., 3100000., 3200000., 3300000., 3400000.],
                                             [4900000., 5000000., 5100000., 5200000., 5300000., 5400000., 5500000.],
                                             [7000000., 7100000., 7200000., 7300000., 7400000., 7500000., 7600000.],
                                             [9100000., 9200000., 9300000., 9400000., 9500000., 9600000., 9700000.]],
                                            dtype=np.float)

        assert_arrays_almost_equal(cube_out[0].data, result_data)
        assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points)
    def test_GIVEN_grid_contains_single_points_WHEN_collapse_THEN_stddev_undefined(self):
        grid = {'y': AggregationGrid(-10, 10, float('Nan'), False)}
        cube = mock.make_mock_cube(2, 2)
        cube.data = numpy.ma.masked_invalid([[float('Nan'), 1], [float('Nan'), float('Nan')]])
        kernel = aggregation_kernels['moments']
        agg = Aggregator(cube, grid)
        result = agg.aggregate_gridded(kernel)

        assert_that(result[1].data.mask.all())
    def test_can_name_variables_by_variable_name(self):
        grid = {'lon': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'lat': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result = numpy.array(8.0)

        assert numpy.array_equal(result, cube_out[0].data)
    def test_aggregating_to_same_grid_raises_NotImplementedError(self):
        # Partial collapse of gridded data not supported (see JASCIS-148).
        grid = {'y': AggregationGrid(-12.5, 12.5, 5, False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        assert numpy.array_equal(self.cube.data, cube_out.data)
        assert numpy.array_equal(self.cube.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
        assert numpy.array_equal(self.cube.coords('longitude')[0].points, cube_out.coords('longitude')[0].points)
    def test_collapsing_everything_returns_a_single_value(self):
        grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'y': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result = numpy.array(8.0)

        assert numpy.array_equal(result, cube_out[0].data)
    def test_collapsing_coordinate_takes_start_end_but_ignores_them(self):
        grid = {'x': AggregationGrid(0, 5, float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result = numpy.array([2, 5, 8, 11, 14])

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result, cube_out[0].data)
        assert numpy.array_equal(self.cube.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
    def test_aggregation_on_three_dimensional_grid_with_time(self):
        self.cube = make_mock_cube(time_dim_length=7)
        grid = {'t': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), True),
                'x': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'y': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result_data = numpy.array(53)
        assert_arrays_almost_equal(result_data, cube_out[0].data)
    def test_aggregating_using_std_dev_kernel_returns_sample_standard_deviation(self):
        self.kernel = iris.analysis.STD_DEV
        grid = {'y': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result = numpy.array([numpy.sqrt(22.5), numpy.sqrt(22.5), numpy.sqrt(22.5)])

        assert numpy.array_equal(result, cube_out[0].data)
        assert numpy.array_equal(self.cube.coords('longitude')[0].points, cube_out.coords('longitude')[0].points)
    def test_aggregating_using_min_kernel_returns_minimums(self):
        self.kernel = iris.analysis.MIN
        grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'y': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result = numpy.array(1)

        assert numpy.array_equal(result, cube_out[0].data)
    def test_aggregation_over_multidimensional_coord(self):
        self.cube = make_mock_cube(time_dim_length=7, hybrid_pr_len=5)
        grid = {'t': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), True),
                'x': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'y': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'air_pressure': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result_data = numpy.array(263)
        assert_arrays_almost_equal(cube_out[0].data, result_data)
    def test_collapsing_everything_returns_a_single_value_with_missing_values(self):
        self.cube = make_5x3_lon_lat_2d_cube_with_missing_data()

        grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False),
                'y': AggregationGrid(float('Nan'), float('Nan'), float('NaN'), False)}

        agg = Aggregator(self.cube, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        # result = numpy.array(8.1538461538461533)
        result = numpy.array(numpy.mean(self.cube.data))

        assert numpy.allclose(result, cube_out[0].data, rtol=1e-2)
    def test_GIVEN_gridded_data_WHEN_full_collapse_THEN_calculations_correct(self):
        grid = {'y': AggregationGrid(-10, 10, float('Nan'), False)}
        cube = mock.make_mock_cube()
        kernel = aggregation_kernels['moments']
        agg = Aggregator(cube, grid)
        result = agg.aggregate_gridded(kernel)

        expected_means = numpy.array([7, 8, 9])
        expected_std_dev = numpy.array(3 * [numpy.sqrt(22.5)])
        expected_no = numpy.array([5, 5, 5])
        assert_that(len(result), is_(3))
        assert_that(numpy.allclose(result[0].data, expected_means))
        assert_that(numpy.allclose(result[1].data, expected_std_dev))
        assert_that(numpy.array_equal(result[2].data, expected_no))
    def test_aggregate_mean(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube())
        data2 = make_from_cube(make_mock_cube(data_offset=1))
        datalist = GriddedDataList([data1, data2])
        grid = {'y': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)}

        agg = Aggregator(datalist, grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result1 = numpy.array([7, 8, 9])
        result2 = result1 + 1

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
    def test_partial_aggregation_over_multidimensional_coord_with_multi_kernel(self):
        from cis.data_io.gridded_data import GriddedData
        # JASCIS-126
        from cis.aggregation.aggregation_kernels import MultiKernel, StddevKernel, CountKernel
        self.kernel = MultiKernel('moments', [iris.analysis.MEAN, StddevKernel(), CountKernel()])
        self.cube = make_mock_cube(time_dim_length=7, hybrid_pr_len=5)
        grid = {'t': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), True)}

        agg = Aggregator(GriddedData.make_from_cube(self.cube), grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result_data = numpy.array([[[16.0, 17.0, 18.0, 19.0, 20.0],
                                    [51.0, 52.0, 53.0, 54.0, 55.0],
                                    [86.0, 87.0, 88.0, 89.0, 90.0]],

                                   [[121.0, 122.0, 123.0, 124.0, 125.0],
                                    [156.0, 157.0, 158.0, 159.0, 160.0],
                                    [191.0, 192.0, 193.0, 194.0, 195.0]],

                                   [[226.0, 227.0, 228.0, 229.0, 230.0],
                                    [261.0, 262.0, 263.0, 264.0, 265.0],
                                    [296.0, 297.0, 298.0, 299.0, 300]],

                                   [[331.0, 332.0, 333.0, 334.0, 335.0],
                                    [366.0, 367.0, 368.0, 369.0, 370.0],
                                    [401.0, 402.0, 403.0, 404.0, 405.0]],

                                   [[436.0, 437.0, 438.0, 439.0, 440.0],
                                    [471.0, 472.0, 473.0, 474.0, 475.0],
                                    [506.0, 507.0, 508.0, 509.0, 510.0]]], dtype=np.float)

        multidim_coord_points = numpy.array([[300000., 1000000., 1700000.],
                                             [2400000., 3100000., 3800000.],
                                             [4500000., 5200000., 5900000.],
                                             [6600000., 7300000., 8000000.],
                                             [8700000., 9400000., 10100000.]], dtype=np.float)

        assert_arrays_almost_equal(cube_out[0].data, result_data)
        assert_arrays_almost_equal(cube_out[1].data, np.ones(result_data.shape)*10.8012345)
        assert_arrays_almost_equal(cube_out[2].data, np.ones(result_data.shape) * 7)
        assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points)
Exemple #19
0
    def test_GIVEN_multiple_variables_and_filenames_WHEN_aggregate_THEN_Aggregate_called_correctly(self):
        variables = ['var_name1', 'var_name2']
        filenames = ['filename1', 'filename2']
        output_file = 'output.hdf'
        kernel = 'mean'
        grid = 'grid'
        input_data = GriddedDataList(2 * [make_from_cube(make_square_5x3_2d_cube())])
        output_data = input_data

        mock_data_reader = DataReader()
        mock_data_reader.read_data_list = MagicMock(return_value=input_data)
        mock_data_writer = DataWriter()
        mock_data_writer.write_data = Mock()
        mock_aggregator = Aggregator(None, None)
        mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data)  # Return the data array unmodified

        aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer)
        aggregate._create_aggregator = MagicMock(return_value=mock_aggregator)
        aggregate.aggregate(variables, filenames, None, kernel)

        assert_that(mock_aggregator.aggregate_gridded.call_count, is_(1))
        assert_that(mock_aggregator.aggregate_gridded.call_args[0][0], kernel)
    def test_partial_aggregation_over_more_than_one_multidimensional_coord(self):
        from cis.data_io.gridded_data import GriddedData
        self.cube = make_mock_cube(time_dim_length=7, hybrid_pr_len=5, geopotential_height=True)
        grid = {'t': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), True),
                'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)}

        agg = Aggregator(GriddedData.make_from_cube(self.cube), grid)
        cube_out = agg.aggregate_gridded(self.kernel)

        result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0],
                                   [156.0, 157.0, 158.0, 159.0, 160.0],
                                   [261.0, 262.0, 263.0, 264.0, 265.0],
                                   [366.0, 367.0, 368.0, 369.0, 370.0],
                                   [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float)

        altitude_points = result_data + 9

        surface_air_pressure_points = numpy.array([1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float)

        assert_arrays_almost_equal(cube_out[0].data, result_data)
        assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, surface_air_pressure_points)
        assert_arrays_almost_equal(cube_out[0].coord('altitude').points, altitude_points)
    def test_GIVEN_gridded_data_WHEN_full_collapse_THEN_metadata_correct(self):
        grid = {'y': AggregationGrid(-10, 10, float('Nan'), False)}
        cube = mock.make_mock_cube()
        cube.standard_name = 'age_of_sea_ice'  # Use a CF compliant name
        cube.long_name = 'Age of sea ice'
        cube.var_name = 'age_ice'
        cube.units = 'years'
        kernel = aggregation_kernels['moments']
        agg = Aggregator(cube, grid)
        result = agg.aggregate_gridded(kernel)

        mean, stddev, num = result
        assert_that(mean.standard_name, is_('age_of_sea_ice'))
        assert_that(stddev.standard_name, is_(None))
        assert_that(num.standard_name, is_(None))
        assert_that(mean.long_name, is_('Age of sea ice'))
        assert_that(stddev.long_name, is_('Corrected sample standard deviation of Age of sea ice'))
        assert_that(num.long_name, is_('Number of points used to calculate the mean of Age of sea ice'))
        assert_that(mean.var_name, is_('age_ice'))
        assert_that(stddev.var_name, is_('age_ice_std_dev'))
        assert_that(num.var_name, is_('age_ice_num_points'))
        assert_that(mean.units, is_('years'))
        assert_that(stddev.units, is_('years'))
        assert_that(num.units, is_(None))