def test_complete_collapse_two_dims_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) output = data.collapsed(['x', 'y'], how=self.kernel) expect_mean = numpy.array(7.75) expect_stddev = numpy.array(numpy.sqrt(244.25 / 11)) expect_count = numpy.array(12) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' # Latitude area weighting means these aren't quite right so increase the rtol. assert numpy.allclose(mean_1.data, expect_mean, 1e-3) assert numpy.allclose(mean_2.data, expect_mean + 10, 1e-3) assert numpy.allclose(stddev_1.data, expect_stddev) assert numpy.allclose(stddev_2.data, expect_stddev) assert numpy.allclose(count_1.data, expect_count) assert numpy.allclose(count_2.data, expect_count)
def test_extrapolation_of_pres_points_on_hybrid_pressure_coordinates_multi_var( self): cube_list = [ make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10)) ] cube_list.append( make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10, data_offset=100))) sample_points = UngriddedData.from_points_array( # Point interpolated in the horizontal and then extrapolated past the top vertical layer (by one layer) [ HyperPoint(lat=-4.0, lon=-4.0, pres=68400050.0, t=dt.datetime(1984, 8, 27)) ]) col = GriddedUngriddedCollocator(extrapolate=True) new_data = col.collocate(sample_points, cube_list, None, 'lin') assert_almost_equal(new_data[0].data[0], 125.0, decimal=7) assert_almost_equal(new_data[1].data[0], 225.0, decimal=7)
def test_horizontal_constraint_for_same_3d_grids_returns_original_data(self): # Create sample and data cubes that include a time coordinate with the dimensions in reverse of normal order. sample_cube = gridded_data.make_from_cube( mock.make_mock_cube(lat_dim_length=5, lon_dim_length=3, time_dim_length=2, dim_order=["time", "lon", "lat"]) ) data_cube = gridded_data.make_from_cube( mock.make_mock_cube(lat_dim_length=5, lon_dim_length=3, time_dim_length=2, dim_order=["time", "lon", "lat"]) ) data_points = data_cube.get_non_masked_points() sample_points = sample_cube.get_all_points() coord_map = make_coord_map(sample_cube, data_cube) # Make separation constraint small enough to include only the corresponding point in the data cube. constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index for idx, sample_point in enumerate(sample_points): out_points = constraint.constrain_points(sample_point, data_points) # Two times for each spatial position. assert len(out_points) == 2 assert data_points[idx].val[0] in [p.val[0] for p in out_points]
def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5)) data2 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1)) datalist = GriddedDataList([data1, data2]) grid = {'t': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), True), 'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)} agg = Aggregator(datalist, grid) cube_out = agg.aggregate_gridded(self.kernel) result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0], [156.0, 157.0, 158.0, 159.0, 160.0], [261.0, 262.0, 263.0, 264.0, 265.0], [366.0, 367.0, 368.0, 369.0, 370.0], [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float) multidim_coord_points = numpy.array([1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal(cube_out[1].data, result_data+1) assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points) assert_arrays_almost_equal(cube_out[1].coord('surface_air_pressure').points, multidim_coord_points)
def test_complete_collapse_one_dim_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)} agg = Aggregator(data, grid) output = agg.aggregate_gridded(self.kernel) expect_mean = numpy.array([[5.5, 8.75, 9]]) expect_stddev = numpy.array([numpy.sqrt(15), numpy.sqrt(26.25), numpy.sqrt(30)]) expect_count = numpy.array([[4, 4, 4]]) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, mean_2, stddev_1, stddev_2, count_1, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' assert_arrays_almost_equal(mean_1.data, expect_mean) assert_arrays_almost_equal(mean_2.data, expect_mean + 10) assert_arrays_almost_equal(stddev_1.data, expect_stddev) assert_arrays_almost_equal(stddev_2.data, expect_stddev) assert_arrays_almost_equal(count_1.data, expect_count) assert_arrays_almost_equal(count_2.data, expect_count)
def test_complete_collapse_two_dims_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) grid = {'x': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False), 'y': AggregationGrid(float('Nan'), float('Nan'), float('Nan'), False)} agg = Aggregator(data, grid) output = agg.aggregate_gridded(self.kernel) expect_mean = numpy.array(7.75) expect_stddev = numpy.array(numpy.sqrt(244.25 / 11)) expect_count = numpy.array(12) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, mean_2, stddev_1, stddev_2, count_1, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' # Latitude area weighting means these aren't quite right so increase the rtol. assert numpy.allclose(mean_1.data, expect_mean, 1e-3) assert numpy.allclose(mean_2.data, expect_mean + 10, 1e-3) assert numpy.allclose(stddev_1.data, expect_stddev) assert numpy.allclose(stddev_2.data, expect_stddev) assert numpy.allclose(count_1.data, expect_count) assert numpy.allclose(count_2.data, expect_count)
def test_GIVEN_gridded_datagroups_WHEN_read_datagroups_THEN_data_returned_in_list( self): datagroup_1 = { 'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None } datagroup_2 = { 'variables': ['var3'], 'filenames': ['filename2.nc'], 'product': 'cis' } var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) var3 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2, var3]) get_var_func = MagicMock(side_effect=lambda f: { 'filename1.nc': ['var1', 'var2'], 'filename2.nc': ['var3'] }[f]) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup_1, datagroup_2]) assert_that(len(data), is_(3)) assert_that(data[0], is_(var1)) assert_that(data[1], is_(var2)) assert_that(data[2], is_(var3))
def test_complete_collapse_one_dim_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) output = data.collapsed(['x'], how=self.kernel) expect_mean = numpy.array([[5.5, 8.75, 9]]) expect_stddev = numpy.array( [numpy.sqrt(15), numpy.sqrt(26.25), numpy.sqrt(30)]) expect_count = numpy.array([[4, 4, 4]]) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' assert_arrays_almost_equal(mean_1.data, expect_mean) assert_arrays_almost_equal(mean_2.data, expect_mean + 10) assert_arrays_almost_equal(stddev_1.data, expect_stddev) assert_arrays_almost_equal(stddev_2.data, expect_stddev) assert_arrays_almost_equal(count_1.data, expect_count) assert_arrays_almost_equal(count_2.data, expect_count)
def test_collocation_of_pres_alt_points_on_hybrid_pressure_coordinates_multi_var(self): cube_list = [make_from_cube(mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10))] cube_list.append(make_from_cube(mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10, data_offset=100))) sample_points = UngriddedData.from_points_array( [HyperPoint(lat=0.0, lon=0.0, pres=111100040.5, alt=5000, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=0.0, lon=0.0, pres=113625040.5, alt=4000, t=dt.datetime(1984, 8, 28, 12, 0, 0)), HyperPoint(lat=5.0, lon=2.5, pres=177125044.5, alt=3000, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=-4.0, lon=-4.0, pres=166600039.0, alt=3500, t=dt.datetime(1984, 8, 27))]) col = GriddedUngriddedCollocator() outlist = col.collocate(sample_points, cube_list, None, 'lin') # First data set: new_data = outlist[0] # Exactly on the lat, lon, time points, interpolated over pressure assert_almost_equal(new_data.data[0], 221.5, decimal=5) # Exactly on the lat, lon, points, interpolated over time and pressure assert_almost_equal(new_data.data[1], 226.5, decimal=7) # Exactly on the lat, time points, interpolated over longitude and pressure assert_almost_equal(new_data.data[2], 330.5, decimal=7) # Outside of the pressure bounds - extrapolation off assert np.ma.is_masked(new_data.data[3]) # Second dataset: new_data = outlist[1] # Exactly on the lat, lon, time points, interpolated over pressure assert_almost_equal(new_data.data[0], 321.5, decimal=5) # Exactly on the lat, lon, points, interpolated over time and pressure assert_almost_equal(new_data.data[1], 326.5, decimal=7) # Exactly on the lat, time points, interpolated over longitude and pressure assert_almost_equal(new_data.data[2], 430.5, decimal=7) # Outside of the pressure bounds - extrapolation off assert np.ma.is_masked(new_data.data[3])
def test_list_gridded_ungridded_box_moments(self): data1 = make_from_cube(mock.make_mock_cube()) data1.name = lambda: 'Name1' data1.var_name = 'var_name1' data1._standard_name = 'y_wind' data2 = make_from_cube(mock.make_mock_cube(data_offset=3)) data2.name = lambda: 'Name1' data2.var_name = 'var_name2' data2._standard_name = 'x_wind' data_list = GriddedDataList([data1, data2]) sample = UngriddedData.from_points_array( [HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=3.0, lon=3.0, alt=7.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=-1.0, lon=-1.0, alt=5.0, t=dt.datetime(1984, 8, 29, 8, 34))]) constraint = SepConstraintKdtree('500km') kernel = moments() col = GeneralUngriddedCollocator() output = col.collocate(sample, data_list, constraint, kernel) expected_result = np.array([28.0/3, 10.0, 20.0/3]) expected_stddev = np.array([1.52752523, 1.82574186, 1.52752523]) expected_n = np.array([3, 4, 3]) assert len(output) == 6 assert isinstance(output, UngriddedDataList) assert np.allclose(output[0].data, expected_result) assert np.allclose(output[1].data, expected_stddev) assert np.allclose(output[2].data, expected_n) assert np.allclose(output[3].data, expected_result + 3) assert np.allclose(output[4].data, expected_stddev) assert np.allclose(output[5].data, expected_n)
def test_GIVEN_single_variable_WHEN_aggregate_THEN_DataWriter_called_correctly(self): variables = 'var_name' filenames = 'filename' output_file = 'output.hdf' kernel = 'mean' grid = None input_data = GriddedDataList([make_from_cube(make_square_5x3_2d_cube())]) output_data = make_from_cube(make_square_5x3_2d_cube() + 1) mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=input_data) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_aggregator = Aggregator(None, None) mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data) # Return the modified data array aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer) aggregate._create_aggregator = MagicMock(return_value=mock_aggregator) aggregate.aggregate(variables, filenames, None, kernel) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_filename, is_(output_file))
def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord( self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube( make_mock_cube(time_dim_length=7, hybrid_pr_len=5)) data2 = make_from_cube( make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['t', 'x'], how=self.kernel) result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0], [156.0, 157.0, 158.0, 159.0, 160.0], [261.0, 262.0, 263.0, 264.0, 265.0], [366.0, 367.0, 368.0, 369.0, 370.0], [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float) multidim_coord_points = numpy.array( [1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal(cube_out[1].data, result_data + 1) assert_arrays_almost_equal( cube_out[0].coord('surface_air_pressure').points, multidim_coord_points) assert_arrays_almost_equal( cube_out[1].coord('surface_air_pressure').points, multidim_coord_points)
def setUp(self): """ Create the dummy objects necessary for testing """ self.ug = make_regular_2d_ungridded_data() self.ug_1 = make_regular_2d_ungridded_data() self.gd = make_from_cube(make_mock_cube()) self.gd_large = make_from_cube(make_mock_cube(50, 30))
def _make_two_gridded(self): data1 = make_from_cube(mock.make_mock_cube()) data2 = make_from_cube(mock.make_mock_cube(data_offset=10)) data1.var_name = 'var1' data2._var_name = 'var2' data1.filenames = ['filename1'] data2.filenames = ['filename2'] self.data = [data1, data2] self.data = GriddedDataList([data1, data2])
def test_collocation_of_pres_alt_points_on_hybrid_pressure_coordinates_multi_var( self): cube_list = [ make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10)) ] cube_list.append( make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10, data_offset=100))) sample_points = UngriddedData.from_points_array([ HyperPoint(lat=0.0, lon=0.0, pres=111100040.5, alt=5000, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=0.0, lon=0.0, pres=113625040.5, alt=4000, t=dt.datetime(1984, 8, 28, 12, 0, 0)), HyperPoint(lat=5.0, lon=2.5, pres=177125044.5, alt=3000, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=-4.0, lon=-4.0, pres=166600039.0, alt=3500, t=dt.datetime(1984, 8, 27)) ]) col = GriddedUngriddedCollocator() outlist = col.collocate(sample_points, cube_list, None, 'lin') # First data set: new_data = outlist[0] # Exactly on the lat, lon, time points, interpolated over pressure assert_almost_equal(new_data.data[0], 221.5, decimal=5) # Exactly on the lat, lon, points, interpolated over time and pressure assert_almost_equal(new_data.data[1], 226.5, decimal=7) # Exactly on the lat, time points, interpolated over longitude and pressure assert_almost_equal(new_data.data[2], 330.5, decimal=7) # Outside of the pressure bounds - extrapolation off assert np.ma.is_masked(new_data.data[3]) # Second dataset: new_data = outlist[1] # Exactly on the lat, lon, time points, interpolated over pressure assert_almost_equal(new_data.data[0], 321.5, decimal=5) # Exactly on the lat, lon, points, interpolated over time and pressure assert_almost_equal(new_data.data[1], 326.5, decimal=7) # Exactly on the lat, time points, interpolated over longitude and pressure assert_almost_equal(new_data.data[2], 430.5, decimal=7) # Outside of the pressure bounds - extrapolation off assert np.ma.is_masked(new_data.data[3])
def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError(self): datagroup = {'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None, 'aliases': ['alias1']} var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2]) get_var_func = MagicMock(side_effect=['var1', 'var2']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): data = reader.read_datagroups([datagroup])
def test_gridded_gridded_bin_when_sample_has_dimension_data_doesnt(self): # JASCIS-204 from cis.data_io.gridded_data import make_from_cube sample = make_from_cube(make_mock_cube(time_dim_length=7, dim_order=['lat', 'lon', 'time'])) data = make_from_cube(make_mock_cube(lat_dim_length=11, lon_dim_length=13, time_dim_length=0, dim_order=['time', 'lon', 'lat'])) col = GeneralGriddedCollocator() constraint = BinningCubeCellConstraint() kernel = mean() out_cube = col.collocate(points=sample, data=data, constraint=constraint, kernel=kernel) assert out_cube[0].shape == (5, 3)
def test_extrapolation_of_pres_points_on_hybrid_pressure_coordinates_multi_var(self): cube_list = [make_from_cube(mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10))] cube_list.append(make_from_cube(mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10, data_offset=100))) sample_points = UngriddedData.from_points_array( # Point interpolated in the horizontal and then extrapolated past the top vertical layer (by one layer) [HyperPoint(lat=-4.0, lon=-4.0, pres=68400050.0, t=dt.datetime(1984, 8, 27))]) col = GriddedUngriddedCollocator(extrapolate=True) new_data = col.collocate(sample_points, cube_list, None, 'lin') assert_almost_equal(new_data[0].data[0], 125.0, decimal=7) assert_almost_equal(new_data[1].data[0], 225.0, decimal=7)
def test_gridded_ungridded_lin(self): data = make_from_cube(mock.make_mock_cube()) data.name = lambda: 'Name' data.var_name = 'var_name' data._standard_name = 'y_wind' sample = UngriddedData.from_points_array([ HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=3.0, lon=3.0, alt=7.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=-1.0, lon=-1.0, alt=5.0, t=dt.datetime(1984, 8, 29, 8, 34)) ]) constraint = None col = GriddedUngriddedCollocator() output = col.collocate(sample, data, constraint, 'lin') expected_result = np.array([8.8, 10.4, 7.2]) assert len(output) == 1 assert isinstance(output, UngriddedDataList) assert np.allclose(output[0].data, expected_result)
def test_already_collocated_in_col_gridded_to_ungridded_in_2d(self): cube = make_from_cube(mock.make_square_5x3_2d_cube()) # This point already exists on the cube with value 5 - which shouldn't be a problem sample_points = UngriddedData.from_points_array([HyperPoint(0.0, 0.0)]) col = GriddedUngriddedCollocator() new_data = col.collocate(sample_points, cube, None, 'nn')[0] eq_(new_data.data[0], 8.0)
def test_get_axis_gridded(self): from cis.plotting.plot import get_axis from cis.test.util.mock import make_mock_cube from cis.data_io.gridded_data import make_from_cube d = make_from_cube(make_mock_cube()) assert get_axis(d, "x").name() == 'longitude' assert get_axis(d, "y").name() == 'latitude'
def test_wrapping_of_alt_points_on_hybrid_height_coordinates_on_0_360_grid( self): cube = make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_ht_len=10, lon_dim_length=36, lon_range=(0., 350.))) # Shift the cube around so that the dim which isn't hybrid (time) is at the front. This breaks the fix we used # for air pressure... cube.transpose([2, 0, 1, 3]) # Ensure the longitude coord is circular cube.coord(standard_name='longitude').circular = True sample_points = UngriddedData.from_points_array([ HyperPoint(lat=4.0, lon=355.0, alt=11438.0, t=dt.datetime(1984, 8, 28)), HyperPoint(lat=0.0, lon=2.0, alt=10082.0, t=dt.datetime(1984, 8, 28)) ]) col = GriddedUngriddedCollocator(extrapolate=False) new_data = col.collocate(sample_points, cube, None, 'lin')[0] eq_(new_data.data[0], 3563.0) eq_(new_data.data[1], 2185.0)
def test_can_subset_2d_gridded_data_with_missing_data(self): """This test just shows that missing values do not interfere with subsetting - nothing special happens to the missing values. """ data = make_from_cube(cis.test.util.mock.make_square_5x3_2d_cube_with_missing_data()) subset = data.subset(longitude=[0.0, 5.0]) assert (subset.data.tolist(fill_value=-999) == [[2, 3], [-999, 6], [8, -999], [11, 12], [14, 15]])
def test_gridded_write_no_time_has_no_unlimited_dimension(self): data = make_from_cube(make_mock_cube()) data.var_name = 'rain' data.save_data(tmp_file) self.d = Dataset(tmp_file) for d in self.d.dimensions.values(): assert not d.isunlimited()
def test_can_access_point_in_gridded_hyper_point_view_(self): gd = gridded_data.make_from_cube(mock.make_5x3_lon_lat_2d_cube_with_missing_data()) hpv = gd.get_all_points() p = hpv[3, 1] assert(p.val[0] == 11.0) assert(p.longitude == 5.0) assert(p.latitude == 0.0)
def constrain(self, data): """ Subsets the supplied data using a combination of iris.cube.Cube.extract and iris.cube.Cube.intersection, depending on whether intersection is supported (whether the coordinate has a defined modulus). :param data: data to be subsetted :return: subsetted data or None if all data excluded. @rtype: cis.data_io.gridded_data.GriddedData """ _shape = self._limits.pop('shape', None) extract_constraint, intersection_constraint = self._make_extract_and_intersection_constraints(data) if extract_constraint is not None: data = data.extract(extract_constraint) if data is None: return None # Don't do the intersection if intersection_constraint: try: data = data.intersection(**intersection_constraint) except IndexError: return None if _shape is not None: if data.ndim > 2: raise NotImplementedError("Unable to perform shape subset for multidimensional gridded datasets") mask = np.ones(data.shape, dtype=bool) mask[np.unravel_index(_get_gridded_subset_region_indices(data, _shape), data.shape)] = False if isinstance(data.data, np.ma.MaskedArray): data.data.mask &= mask else: data.data = np.ma.masked_array(data.data, mask) return gridded_data.make_from_cube(data)
def test_gridded_write_units(self): data = make_from_cube(make_mock_cube()) data.var_name = 'rain' data.units = 'ppm' data.save_data(tmp_file) self.d = Dataset(tmp_file) assert self.d.variables['rain'].units == 'ppm'
def test_gridded_list_write_no_time_has_no_unlimited_dimension(self): data = GriddedDataList([make_from_cube(make_mock_cube())]) data[0].var_name = 'rain' data.save_data(tmp_file) self.d = Dataset(tmp_file) for d in self.d.dimensions.values(): assert not d.isunlimited()
def test_gridded_list_write_time_as_unlimited_dimension(self): data = GriddedDataList( [make_from_cube(make_mock_cube(time_dim_length=7))]) data[0].var_name = 'rain' data.save_data(tmp_file) self.d = Dataset(tmp_file) assert self.d.dimensions['time'].isunlimited()
def test_partial_aggregation_over_multidimensional_coord(self): # JASCIS-126 self.cube = make_from_cube( make_mock_cube(time_dim_length=7, hybrid_pr_len=5)) cube_out = self.cube.collapsed(['t'], how=self.kernel) result_data = numpy.array( [[[16.0, 17.0, 18.0, 19.0, 20.0], [51.0, 52.0, 53.0, 54.0, 55.0], [86.0, 87.0, 88.0, 89.0, 90.0]], [[121.0, 122.0, 123.0, 124.0, 125.0], [156.0, 157.0, 158.0, 159.0, 160.0], [191.0, 192.0, 193.0, 194.0, 195.0]], [[226.0, 227.0, 228.0, 229.0, 230.0], [261.0, 262.0, 263.0, 264.0, 265.0], [296.0, 297.0, 298.0, 299.0, 300]], [[331.0, 332.0, 333.0, 334.0, 335.0], [366.0, 367.0, 368.0, 369.0, 370.0], [401.0, 402.0, 403.0, 404.0, 405.0]], [[436.0, 437.0, 438.0, 439.0, 440.0], [471.0, 472.0, 473.0, 474.0, 475.0], [506.0, 507.0, 508.0, 509.0, 510.0]]], dtype=np.float) multidim_coord_points = numpy.array( [[300000., 1000000., 1700000.], [2400000., 3100000., 3800000.], [4500000., 5200000., 5900000.], [6600000., 7300000., 8000000.], [8700000., 9400000., 10100000.]], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal( cube_out[0].coord('surface_air_pressure').points, multidim_coord_points)
def test_setting_new_hyperpoint_coord_can_raise_exception(self): gd = gridded_data.make_from_cube(mock.make_5x3_lon_lat_2d_cube_with_missing_data()) hpv = gd.get_non_masked_points() hpv._verify_no_coord_change_on_setting = True p = hpv[6] p_new = p.modified(lon=123, val=99) hpv[6] = p_new
def test_collocation_of_alt_pres_points_on_hybrid_altitude_coordinates( self): cube = make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_ht_len=10)) sample_points = UngriddedData.from_points_array([ HyperPoint(lat=0.0, lon=0.0, alt=5550.0, pres=10000.0, t=dt.datetime(1984, 8, 28)), HyperPoint(lat=4.0, lon=4.0, alt=6000.0, pres=1000.0, t=dt.datetime(1984, 8, 28)), HyperPoint(lat=-4.0, lon=-4.0, alt=6500.0, pres=100.0, t=dt.datetime(1984, 8, 27)) ]) col = GriddedUngriddedCollocator(fill_value=np.NAN) new_data = col.collocate(sample_points, cube, None, 'lin')[0] assert_almost_equal(new_data.data[0], 222.4814815, decimal=7) assert_almost_equal(new_data.data[1], 321.0467626, decimal=7) # Test that points outside the cell are returned as masked, rather than extrapolated by default assert np.ma.is_masked(new_data.data[2])
def test_aggregate_mean(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube()) data2 = make_from_cube(make_mock_cube(data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['y'], how=self.kernel) result1 = numpy.array([7, 8, 9]) result2 = result1 + 1 assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data)
def test_missing_data_for_missing_sample(self): data = make_from_cube(mock.make_mock_cube()) data.name = lambda: 'Name' data.var_name = 'var_name' data._standard_name = 'y_wind' sample = UngriddedData.from_points_array([ HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=3.0, lon=3.0, alt=7.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=-1.0, lon=-1.0, alt=5.0, t=dt.datetime(1984, 8, 29, 8, 34)) ]) constraint = None sample_mask = [False, True, False] sample.data = np.ma.array([0, 0, 0], mask=sample_mask) col = GriddedUngriddedCollocator(missing_data_for_missing_sample=True) output = col.collocate(sample, data, constraint, 'nn') assert len(output) == 1 assert isinstance(output, UngriddedDataList) assert np.array_equal(output[0].data.mask, sample_mask)
def _iris_interpolate(coord_names_and_sizes_for_output_grid, coord_names_and_sizes_for_sample_grid, data, kernel, output_mask, points): """ Collocates using iris.analysis.interpolate """ coordinate_point_pairs = [] for j in range(0, len(coord_names_and_sizes_for_sample_grid)): # For each coordinate make the list of tuple pair Iris requires, for example # [('latitude', -90), ('longitude, 0')] coordinate_point_pairs.append((coord_names_and_sizes_for_sample_grid[j][0], points.dim_coords[j].points)) # The result here will be a cube with the correct dimensions for the output, so interpolated over all points # in coord_names_and_sizes_for_output_grid. output_cube = make_from_cube(data.interpolate(coordinate_point_pairs, kernel.interpolater())) # Iris outputs interpolated cubes with the dimensions in the order of the data grid, not the sample grid, # so we need to rearrange the order of the dimensions. output_coord_lookup = {} for idx, coord in enumerate(output_cube.dim_coords): output_coord_lookup[coord.name()] = idx transpose_map = [output_coord_lookup[coord[0]] for coord in coord_names_and_sizes_for_output_grid] output_cube.transpose(transpose_map) if isinstance(output_cube, list): for idx, data in enumerate(output_cube): output_cube[idx].data = cis.utils.apply_mask_to_numpy_array(data.data, output_mask) else: output_cube.data = cis.utils.apply_mask_to_numpy_array(output_cube.data, output_mask) return output_cube
def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError( self): datagroup = { 'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None, 'aliases': ['alias1'] } var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2]) get_var_func = MagicMock(side_effect=['var1', 'var2']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): data = reader.read_datagroups([datagroup])
def test_GIVEN_grid_contains_single_points_WHEN_collapse_THEN_stddev_undefined(self): cube = make_from_cube(mock.make_mock_cube(2, 2)) cube.data = numpy.ma.masked_invalid([[float('Nan'), 1], [float('Nan'), float('Nan')]]) kernel = aggregation_kernels['moments'] result = cube.collapsed(['y'], how=kernel) assert_that(result[1].data.mask.all())
def _iris_interpolate(coord_names_and_sizes_for_output_grid, coord_names_and_sizes_for_sample_grid, data, kernel, output_mask, points, extrapolate): """ Collocates using iris.analysis.interpolate """ coordinate_point_pairs = [] for j in range(0, len(coord_names_and_sizes_for_sample_grid)): # For each coordinate make the list of tuple pair Iris requires, for example # [('latitude', -90), ('longitude, 0')] coordinate_point_pairs.append((coord_names_and_sizes_for_sample_grid[j][0], points.dim_coords[j].points)) # The result here will be a cube with the correct dimensions for the output, so interpolated over all points # in coord_names_and_sizes_for_output_grid. output_cube = make_from_cube(data.interpolate(coordinate_point_pairs, kernel.interpolater(extrapolation_mode=extrapolate))) # Iris outputs interpolated cubes with the dimensions in the order of the data grid, not the sample grid, # so we need to rearrange the order of the dimensions. output_coord_lookup = {} for idx, coord in enumerate(output_cube.dim_coords): output_coord_lookup[coord.name()] = idx transpose_map = [output_coord_lookup[coord[0]] for coord in coord_names_and_sizes_for_output_grid] output_cube.transpose(transpose_map) if isinstance(output_cube, list): for idx, data in enumerate(output_cube): output_cube[idx].data = cis.utils.apply_mask_to_numpy_array(data.data, output_mask) else: output_cube.data = cis.utils.apply_mask_to_numpy_array(output_cube.data, output_mask) return output_cube
def test_partial_aggregation_over_multidimensional_coord(self): # JASCIS-126 self.cube = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5)) cube_out = self.cube.collapsed(['t'], how=self.kernel) result_data = numpy.array([[[16.0, 17.0, 18.0, 19.0, 20.0], [51.0, 52.0, 53.0, 54.0, 55.0], [86.0, 87.0, 88.0, 89.0, 90.0]], [[121.0, 122.0, 123.0, 124.0, 125.0], [156.0, 157.0, 158.0, 159.0, 160.0], [191.0, 192.0, 193.0, 194.0, 195.0]], [[226.0, 227.0, 228.0, 229.0, 230.0], [261.0, 262.0, 263.0, 264.0, 265.0], [296.0, 297.0, 298.0, 299.0, 300]], [[331.0, 332.0, 333.0, 334.0, 335.0], [366.0, 367.0, 368.0, 369.0, 370.0], [401.0, 402.0, 403.0, 404.0, 405.0]], [[436.0, 437.0, 438.0, 439.0, 440.0], [471.0, 472.0, 473.0, 474.0, 475.0], [506.0, 507.0, 508.0, 509.0, 510.0]]], dtype=np.float) multidim_coord_points = numpy.array([[300000., 1000000., 1700000.], [2400000., 3100000., 3800000.], [4500000., 5200000., 5900000.], [6600000., 7300000., 8000000.], [8700000., 9400000., 10100000.]], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points)
def test_collocation_of_alt_points_on_hybrid_pressure_and_altitude_coordinates( self): """ Kernel should use the auxilliary altitude dimension when altitude is present in the coordinates """ cube = make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10, geopotential_height=True)) sample_points = UngriddedData.from_points_array( # This point actually lies outside the lower bounds for altitude at this point in space [ HyperPoint(lat=1.0, lon=1.0, alt=10, t=dt.datetime(1984, 8, 28, 8, 34)), # This point lies in the middle of the altitude bounds at this point HyperPoint(lat=4.0, lon=4.0, alt=354, t=dt.datetime(1984, 8, 28, 8, 34)), # This point lies outside the upper bounds for altitude at this point HyperPoint(lat=-4.0, lon=-4.0, alt=1000, t=dt.datetime(1984, 8, 27, 2, 18, 52)) ]) col = GriddedUngriddedCollocator(extrapolate=True) new_data = col.collocate(sample_points, cube, None, 'nn')[0] eq_(new_data.data[0], float(cube[2, 1, 1, 0].data)) eq_(new_data.data[1], float(cube[3, 2, 1, 4].data)) eq_(new_data.data[2], float(cube[1, 0, 0, 9].data))
def test_GIVEN_single_variable_WHEN_subset_THEN_DataWriter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): data.data += 1 # Modify the data slightly so we can be sure it's passed in correctly return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_from_cube(make_square_5x3_2d_cube())) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_filename, is_(output_file))
def test_collocation_of_pres_points_on_hybrid_pressure_coordinates_and_altitude_coordinates( self): """ When only pressure coordinate is present this should be used for the collocation """ cube = make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10)) sample_points = UngriddedData.from_points_array( # This point actually lies outside the lower bounds for altitude at this point in space [ HyperPoint(lat=1.0, lon=1.0, pres=1100000.0, t=dt.datetime(1984, 8, 28, 8, 34)), # This point lies in the middle of the altitude bounds at this point HyperPoint(lat=4.0, lon=4.0, pres=184600000.0, t=dt.datetime(1984, 8, 28, 8, 34)), # This point lies outside the upper bounds for altitude at this point HyperPoint(lat=-4.0, lon=-4.0, pres=63100049.0, t=dt.datetime(1984, 8, 27, 2, 18, 52)) ]) col = GriddedUngriddedCollocator(extrapolate=True) new_data = col.collocate(sample_points, cube, None, 'nn')[0] eq_(new_data.data[0], float(cube[2, 1, 1, 0].data)) eq_(new_data.data[1], float(cube[3, 2, 1, 4].data)) eq_(new_data.data[2], float(cube[1, 0, 0, 9].data))
def test_GIVEN_single_variable_WHEN_subset_THEN_Subsetter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_from_cube(make_square_5x3_2d_cube())) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data.data.tolist(), is_([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]])) assert_that(called_constraint, instance_of(GriddedSubsetConstraint)) assert_that(called_constraint._limits['latitude'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['longitude'][1:3], is_((xmin, xmax)))
def test_wrapping_of_pres_points_on_hybrid_pressure_coordinates_on_0_360_grid( self): cube = make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10, lon_dim_length=36, lon_range=(0., 350.))) # Ensure the longitude coord is circular cube.coord(standard_name='longitude').circular = True sample_points = UngriddedData.from_points_array([ HyperPoint(lat=0.0, lon=355.0, pres=1482280045.0, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=5.0, lon=2.5, pres=1879350048.0, t=dt.datetime(1984, 8, 28, 0, 0, 0)) ]) col = GriddedUngriddedCollocator(extrapolate=False) new_data = col.collocate(sample_points, cube, None, 'lin')[0] eq_(new_data.data[0], 2701.0011131725005) eq_(new_data.data[1], 3266.1930161260775)
def test_aggregation_on_three_dimensional_grid_with_time(self): self.cube = make_from_cube(make_mock_cube(time_dim_length=7)) cube_out = self.cube.collapsed(['t', 'x', 'y'], how=self.kernel) result_data = numpy.array(53) assert_arrays_almost_equal(result_data, cube_out[0].data)
def test_collocation_of_pres_points_on_hybrid_pressure_coordinates(self): cube = make_from_cube( mock.make_mock_cube(time_dim_length=3, hybrid_pr_len=10)) sample_points = UngriddedData.from_points_array([ HyperPoint(lat=0.0, lon=0.0, pres=111100040.5, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=0.0, lon=0.0, pres=113625040.5, t=dt.datetime(1984, 8, 28, 12, 0, 0)), HyperPoint(lat=5.0, lon=2.5, pres=177125044.5, t=dt.datetime(1984, 8, 28, 0, 0, 0)), HyperPoint(lat=-4.0, lon=-4.0, pres=166600039.0, t=dt.datetime(1984, 8, 27)) ]) col = GriddedUngriddedCollocator() new_data = col.collocate(sample_points, cube, None, 'lin')[0] # Exactly on the lat, lon, time points, interpolated over pressure assert_almost_equal(new_data.data[0], 221.5, decimal=5) # Exactly on the lat, lon, points, interpolated over time and pressure assert_almost_equal(new_data.data[1], 226.5, decimal=7) # Exactly on the lat, time points, interpolated over longitude and pressure assert_almost_equal(new_data.data[2], 330.5, decimal=7) # Outside of the pressure bounds - extrapolation off assert np.ma.is_masked(new_data.data[3])
def test_partial_aggregation_over_more_than_one_multidimensional_coord( self): self.cube = make_from_cube( make_mock_cube(time_dim_length=7, hybrid_pr_len=5, geopotential_height=True)) cube_out = self.cube.collapsed(['t', 'x'], how=self.kernel) result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0], [156.0, 157.0, 158.0, 159.0, 160.0], [261.0, 262.0, 263.0, 264.0, 265.0], [366.0, 367.0, 368.0, 369.0, 370.0], [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float) altitude_points = result_data + 9 surface_air_pressure_points = numpy.array( [1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal( cube_out[0].coord('surface_air_pressure').points, surface_air_pressure_points) assert_arrays_almost_equal(cube_out[0].coord('altitude').points, altitude_points)
def test_collapse_vertical_coordinate(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(alt_dim_length=6)) data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['z'], how=self.kernel) result1 = data1.data.mean(axis=2) result2 = result1 + 1 assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data) assert numpy.array_equal(data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
def squeeze(data): from iris.cube import Cube from iris.util import squeeze from cis.data_io.gridded_data import make_from_cube if isinstance(data, Cube): return make_from_cube(squeeze(data)) else: return data
def test_can_subset_2d_gridded_data_by_time(self): data = make_from_cube(cis.test.util.mock.make_square_5x3_2d_cube_with_time()) subset = data.subset(time=[140494, 140497]) assert (subset.data.tolist() == [[[3, 4, 5, 6], [10, 11, 12, 13], [17, 18, 19, 20]], [[24, 25, 26, 27], [31, 32, 33, 34], [38, 39, 40, 41]], [[45, 46, 47, 48], [52, 53, 54, 55], [59, 60, 61, 62]], [[66, 67, 68, 69], [73, 74, 75, 76], [80, 81, 82, 83]], [[87, 88, 89, 90], [94, 95, 96, 97], [101, 102, 103, 104]]])