def test_horizontal_constraint_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_points = pd.DataFrame( data={ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'time': [dt.datetime(1984, 8, 29)] }) coord_map = None # Constraint distance selects the central three points. constraint = SepConstraintKdtree(h_sep=1000) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map) constraint.haversine_distance_kd_tree_index = index # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_points.iloc[0], ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_horizontal_constraint_in_2d_with_missing_values(self): ug_data = mock.make_regular_2d_ungridded_data_with_missing_values() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=0) coord_map = None # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() sample_points = None # Not used index.index_data(sample_points, ug_data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index for i, sample_point in ug_data.as_data_frame(time_index=False, name='vals').iterrows(): new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals if np.isnan(sample_point.vals): ref_vals = np.array([]) else: ref_vals = np.array([sample_point.vals]) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_alt_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }) # 15m altitude separation a_sep = 15 constraint = SepConstraintKdtree(a_sep=a_sep) # This should leave us with 15 points: [ 21. 22. 23. 24. 25.] # [ 26. 27. 28. 29. 30.] # [ 31. 32. 33. 34. 35.] ref_vals = np.array([ 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35. ]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_all_constraints_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.get_non_masked_points() sample_point = HyperPoint(lat=0.0, lon=0.0, alt=50.0, pres=50.0, t=dt.datetime(1984, 8, 29)) # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction h_sep = 1000 # 15m altitude separation a_sep = 15 # 1 day (and a little bit) time separation t_sep = "P1dT1M" # Pressure constraint is 50/40 < p_sep < 60/50 p_sep = 1.22 constraint = SepConstraintKdtree(h_sep=h_sep, a_sep=a_sep, p_sep=p_sep, t_sep=t_sep) index = HaversineDistanceKDTreeIndex() index.index_data(None, ug_data_points, None) constraint.haversine_distance_kd_tree_index = index # This should leave us with 9 points: [[ 22, 23, 24] # [ 27, 28, 29] # [ 32, 33, 34]] ref_vals = np.array([27.0, 28.0, 29.0, 32.0, 33.0, 34.0]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_time_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }) # 1 day (and a little bit) time seperation constraint = SepConstraintKdtree(t_sep='P1dT1M') # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_pressure_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'air_pressure': [24.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }) constraint = SepConstraintKdtree(p_sep=2) # This should leave us with 20 points: [ 6. 7. 8. 9. 10.] # [ 11. 12. 13. 14. 15.] # [ 16. 17. 18. 19. 20.] # [ 21. 22. 23. 24. 25.] ref_vals = np.array([ 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25. ]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_all_constraints_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.DataFrame(data={'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'air_pressure': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}).iloc[0] # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction h_sep = 1000 # 15m altitude separation a_sep = 15 # 1 day (and a little bit) time separation t_sep = 'P1dT1M' # Pressure constraint is 50/40 < p_sep < 60/50 p_sep = 1.22 constraint = SepConstraintKdtree(h_sep=h_sep, a_sep=a_sep, p_sep=p_sep, t_sep=t_sep) index = HaversineDistanceKDTreeIndex() index.index_data(None, ug_data_points, None) constraint.haversine_distance_kd_tree_index = index # This should leave us with 9 points: [[ 22, 23, 24] # [ 27, 28, 29] # [ 32, 33, 34]] ref_vals = np.array([27., 28., 29., 32., 33., 34.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_horizontal_constraint_for_same_3d_grids_returns_original_data(self): # Create sample and data cubes that include a time coordinate with the dimensions in reverse of normal order. sample_cube = gridded_data.make_from_cube( mock.make_mock_cube(lat_dim_length=5, lon_dim_length=3, time_dim_length=2, dim_order=["time", "lon", "lat"]) ) data_cube = gridded_data.make_from_cube( mock.make_mock_cube(lat_dim_length=5, lon_dim_length=3, time_dim_length=2, dim_order=["time", "lon", "lat"]) ) data_points = data_cube.get_non_masked_points() sample_points = sample_cube.get_all_points() coord_map = make_coord_map(sample_cube, data_cube) # Make separation constraint small enough to include only the corresponding point in the data cube. constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index for idx, sample_point in enumerate(sample_points): out_points = constraint.constrain_points(sample_point, data_points) # Two times for each spatial position. assert len(out_points) == 2 assert data_points[idx].val[0] in [p.val[0] for p in out_points]
def test_horizontal_constraint_in_2d(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata ug_data = mock.make_regular_2d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_points = UngriddedData(np.array([0.0]), Metadata(), CoordList([Coord(np.array([7.5]), Metadata(standard_name='latitude')), Coord(np.array([-2.5]), Metadata(standard_name='longitude'))])) sample_points_view = sample_points.as_data_frame(time_index=False, name='vals').dropna(axis=1) # sample_point = HyperPoint(lat=7.5, lon=-2.5) # sample_points = HyperPointList([sample_point]) coord_map = None # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index # This should leave us with 4 points ref_vals = np.array([10, 11, 13, 14]) indices = constraint.haversine_distance_kd_tree_index.find_points_within_distance_sample(sample_points_view, 400) new_vals = ug_data.data.flat[indices] eq_(ref_vals.size, len(new_vals[0])) assert (np.equal(ref_vals, new_vals).all())
def test_list_ungridded_ungridded_box_mean(self): ug_data_1 = mock.make_regular_2d_ungridded_data() ug_data_2 = mock.make_regular_2d_ungridded_data(data_offset=3) ug_data_2.long_name = 'TOTAL SNOWFALL RATE: LS+CONV KG/M2/S' ug_data_2.standard_name = 'snowfall_flux' ug_data_2.metadata._name = 'snow' data_list = UngriddedDataList([ug_data_1, ug_data_2]) sample_points = mock.make_regular_2d_ungridded_data() constraint = SepConstraintKdtree('500km') kernel = moments() col = GeneralUngriddedCollocator() output = col.collocate(sample_points, data_list, constraint, kernel) expected_result = np.array(list(range(1, 16))) expected_n = np.array(15 * [1]) assert len(output) == 6 assert isinstance(output, UngriddedDataList) assert output[3].var_name == 'snow' assert output[4].var_name == 'snow_std_dev' assert output[5].var_name == 'snow_num_points' assert np.allclose(output[0].data, expected_result) assert all(output[1].data.mask) assert np.allclose(output[2].data, expected_n) assert np.allclose(output[3].data, expected_result + 3) assert all(output[4].data.mask) assert np.allclose(output[5].data, expected_n)
def test_coordinates_exactly_between_points_in_col_ungridded_to_ungridded_in_2d( self): """ This works out the edge case where the points are exactly in the middle or two or more datapoints. The nn_horizontal algorithm will start with the first point as the nearest and iterates through the points finding any points which are closer than the current closest. If two distances were exactly the same you would expect the first point to be chosen. This doesn't seem to always be the case but is probably down to floating points errors in the haversine calculation as these test points are pretty close together. This test is only really for documenting the behaviour for equidistant points. """ from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_horizontal, SepConstraintKdtree ug_data = mock.make_regular_2d_ungridded_data() sample_points = UngriddedData.from_points_array([ HyperPoint(2.5, 2.5), HyperPoint(-2.5, 2.5), HyperPoint(2.5, -2.5), HyperPoint(-2.5, -2.5) ]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_horizontal())[0] eq_(new_data.data[0], 11.0) eq_(new_data.data[1], 5.0) eq_(new_data.data[2], 10.0) eq_(new_data.data[3], 4.0)
def test_basic_col_in_4d(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_altitude, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_4d_ungridded_data() sample_points = HyperPointList() sample_points.append( HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34))) sample_points.append( HyperPoint(lat=4.0, lon=4.0, alt=34.0, t=dt.datetime(1984, 9, 2, 1, 23))) sample_points.append( HyperPoint(lat=-4.0, lon=-4.0, alt=89.0, t=dt.datetime(1984, 9, 4, 15, 54))) sample_points = UngriddedData.from_points_array(sample_points) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_altitude())[0] eq_(new_data.data[0], 6.0) eq_(new_data.data[1], 16.0) eq_(new_data.data[2], 46.0)
def test_coordinates_outside_grid_in_col_ungridded_to_ungridded_in_2d( self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_pressure, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_4d_ungridded_data() sample_points = HyperPointList() sample_points.append( HyperPoint(lat=0.0, lon=0.0, pres=0.1, t=dt.datetime(1984, 8, 29, 8, 34))) sample_points.append( HyperPoint(lat=0.0, lon=0.0, pres=91.0, t=dt.datetime(1984, 9, 2, 1, 23))) sample_points.append( HyperPoint(lat=0.0, lon=0.0, pres=890.0, t=dt.datetime(1984, 9, 4, 15, 54))) sample_points = UngriddedData.from_points_array(sample_points) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_pressure())[0] eq_(new_data.data[0], 1.0) eq_(new_data.data[1], 46.0) eq_(new_data.data[2], 46.0)
def test_already_collocated_in_col_ungridded_to_ungridded_in_2d(self): ug_data = mock.make_regular_2d_ungridded_data() # This point already exists on the cube with value 5 - which shouldn't be a problem sample_points = UngriddedData.from_points_array([HyperPoint(0.0, 0.0)]) col = GeneralUngriddedCollocator(fill_value=-999) new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_horizontal_only())[0] eq_(new_data.data[0], 8.0)
def test_pressure_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.get_non_masked_points() sample_point = HyperPoint(0.0, 0.0, 50.0, 24.0, dt.datetime(1984, 8, 29)) constraint = SepConstraintKdtree(p_sep=2) # This should leave us with 20 points: [ 6. 7. 8. 9. 10.] # [ 11. 12. 13. 14. 15.] # [ 16. 17. 18. 19. 20.] # [ 21. 22. 23. 24. 25.] ref_vals = np.array( [ 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, ] ) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_all_constraints_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.DataFrame( data={ 'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'air_pressure': [50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))] }).iloc[0] # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction h_sep = 1000 # 15m altitude separation a_sep = 15 # 1 day (and a little bit) time separation t_sep = 'P1dT1M' # Pressure constraint is 50/40 < p_sep < 60/50 p_sep = 1.22 constraint = SepConstraintKdtree(h_sep=h_sep, a_sep=a_sep, p_sep=p_sep, t_sep=t_sep) index = HaversineDistanceKDTreeIndex() index.index_data(None, ug_data_points, None) constraint.haversine_distance_kd_tree_index = index # This should leave us with 9 points: [[ 22, 23, 24] # [ 27, 28, 29] # [ 32, 33, 34]] ref_vals = np.array([27., 28., 29., 32., 33., 34.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_time_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.get_non_masked_points() sample_point = HyperPoint(lat=0.0, lon=0.0, alt=50.0, t=dt.datetime(1984, 8, 29)) # 1 day (and a little bit) time seperation constraint = SepConstraintKdtree(t_sep="P1dT1M") # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_time_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}) # 1 day (and a little bit) time seperation constraint = SepConstraintKdtree(t_sep='P1dT1M') # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_horizontal_constraint_for_same_2d_grids_returns_original_data(self): # Simple case of lat/lon grid with dimensions in that order. sample_cube = gridded_data.make_from_cube(mock.make_mock_cube()) data_cube = gridded_data.make_from_cube(mock.make_mock_cube()) data_points = data_cube.get_non_masked_points() sample_points = sample_cube.get_all_points() coord_map = make_coord_map(sample_cube, data_cube) # Make separation constraint small enough to include only the corresponding point in the data cube. constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index for idx, sample_point in enumerate(sample_points): out_points = constraint.constrain_points(sample_point, data_points) assert len(out_points) == 1 assert out_points[0].val[0] == data_points[idx].val[0]
def test_horizontal_constraint_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.get_non_masked_points() sample_point = HyperPoint(lat=0.0, lon=0.0, alt=50.0, t=dt.datetime(1984, 8, 29)) sample_points = HyperPointList([sample_point]) coord_map = None # Constraint distance selects the central three points. constraint = SepConstraintKdtree(h_sep=1000) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map) constraint.haversine_distance_kd_tree_index = index # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_horizontal_constraint_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_points = pd.DataFrame(data={'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0], 'time': [dt.datetime(1984, 8, 29)]}) coord_map = None # Constraint distance selects the central three points. constraint = SepConstraintKdtree(h_sep=1000) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map) constraint.haversine_distance_kd_tree_index = index # This should leave us with 30 points ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten() new_points = constraint.constrain_points(sample_points.iloc[0], ug_data_points) new_vals = np.sort(new_points.vals) eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_horizontal_constraint_in_2d(self): ug_data = mock.make_regular_2d_ungridded_data() ug_data_points = ug_data.get_non_masked_points() sample_point = HyperPoint(lat=7.5, lon=-2.5) sample_points = HyperPointList([sample_point]) coord_map = None # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index # This should leave us with 4 points ref_vals = np.array([10, 11, 13, 14]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_basic_col_in_2d(self): # lat: -10 to 10 step 5; lon -5 to 5 step 5 ug_data = mock.make_regular_2d_ungridded_data() sample_points = UngriddedData.from_points_array([ HyperPoint(lat=1.0, lon=1.0), HyperPoint(lat=4.0, lon=4.0), HyperPoint(lat=-4.0, lon=-4.0) ]) col = GeneralUngriddedCollocator(fill_value=-999) new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_horizontal_only())[0] eq_(new_data.data[0], 8.0) eq_(new_data.data[1], 12.0) eq_(new_data.data[2], 4.0)
def test_basic_col_with_incompatible_points_throws_a_TypeError(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_pressure, SepConstraintKdtree ug_data = mock.make_regular_4d_ungridded_data() # Make sample points with no time dimension specified sample_points = UngriddedData.from_points_array([ HyperPoint(1.0, 1.0), HyperPoint(4.0, 4.0), HyperPoint(-4.0, -4.0) ]) col = GeneralUngriddedCollocator() with self.assertRaises(AttributeError): new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_pressure())[0]
def test_pressure_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'air_pressure': [24.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}) constraint = SepConstraintKdtree(p_sep=2) # This should leave us with 20 points: [ 6. 7. 8. 9. 10.] # [ 11. 12. 13. 14. 15.] # [ 16. 17. 18. 19. 20.] # [ 21. 22. 23. 24. 25.] ref_vals = np.array([6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_alt_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.get_non_masked_points() sample_point = HyperPoint(lat=0.0, lon=0.0, alt=50.0, t=dt.datetime(1984, 8, 29)) # 15m altitude separation a_sep = 15 constraint = SepConstraintKdtree(a_sep=a_sep) # This should leave us with 15 points: [ 21. 22. 23. 24. 25.] # [ 26. 27. 28. 29. 30.] # [ 31. 32. 33. 34. 35.] ref_vals = np.array([21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_basic_col_in_2d(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_horizontal, SepConstraintKdtree ug_data = mock.make_regular_2d_ungridded_data() sample_points = UngriddedData.from_points_array([ HyperPoint(lat=1.0, lon=1.0), HyperPoint(lat=4.0, lon=4.0), HyperPoint(lat=-4.0, lon=-4.0) ]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_horizontal())[0] eq_(new_data.data[0], 8.0) eq_(new_data.data[1], 12.0) eq_(new_data.data[2], 4.0)
def test_already_collocated_in_col_ungridded_to_ungridded_in_2d(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_pressure, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_4d_ungridded_data() sample_points = UngriddedData.from_points_array([ HyperPoint(lat=0.0, lon=0.0, pres=80.0, t=dt.datetime(1984, 9, 4, 15, 54)) ]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_pressure())[0] eq_(new_data.data[0], 41.0)
def test_horizontal_constraint_in_2d(self): from cis.data_io.Coord import Coord, CoordList from cis.data_io.ungridded_data import Metadata ug_data = mock.make_regular_2d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_points = UngriddedData( np.array([0.0]), Metadata(), CoordList([ Coord(np.array([7.5]), Metadata(standard_name='latitude')), Coord(np.array([-2.5]), Metadata(standard_name='longitude')) ])) sample_points_view = sample_points.as_data_frame( time_index=False, name='vals').dropna(axis=1) # sample_point = HyperPoint(lat=7.5, lon=-2.5) # sample_points = HyperPointList([sample_point]) coord_map = None # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index # This should leave us with 4 points ref_vals = np.array([10, 11, 13, 14]) indices = constraint.haversine_distance_kd_tree_index.find_points_within_distance_sample( sample_points_view, 400) new_vals = ug_data.data.flat[indices] eq_(ref_vals.size, len(new_vals[0])) assert (np.equal(ref_vals, new_vals).all())
def test_horizontal_constraint_in_2d_with_missing_values(self): ug_data = mock.make_regular_2d_ungridded_data_with_missing_values() ug_data_points = ug_data.get_non_masked_points() coord_map = None # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() sample_points = None # Not used index.index_data(sample_points, ug_data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index for sample_point in ug_data.get_all_points(): new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals if sample_point.val[0] is np.ma.masked: ref_vals = np.array([]) else: ref_vals = np.array([sample_point.val]) eq_(ref_vals.size, new_vals.size) assert np.equal(ref_vals, new_vals).all()
def test_alt_constraint_in_4d(self): from cis.collocation.col_implementations import SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_4d_ungridded_data() ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}) # 15m altitude separation a_sep = 15 constraint = SepConstraintKdtree(a_sep=a_sep) # This should leave us with 15 points: [ 21. 22. 23. 24. 25.] # [ 26. 27. 28. 29. 30.] # [ 31. 32. 33. 34. 35.] ref_vals = np.array([21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.]) new_points = constraint.constrain_points(sample_point, ug_data_points) new_vals = new_points.vals eq_(ref_vals.size, new_vals.size) assert (np.equal(ref_vals, new_vals).all())
def test_horizontal_constraint_in_2d_with_missing_values(self): # Test with standard 2d grids but with missing data. sample_cube = gridded_data.make_from_cube(mock.make_mock_cube()) data_cube = gridded_data.make_from_cube(mock.make_square_5x3_2d_cube_with_missing_data()) data_points = data_cube.get_non_masked_points() sample_points = sample_cube.get_all_points() coord_map = make_coord_map(sample_cube, data_cube) # Make separation constraint small enough to include only the corresponding point in the data cube. constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, data_points, coord_map, leafsize=2) constraint.haversine_distance_kd_tree_index = index for idx, sample_point in enumerate(sample_points): out_points = constraint.constrain_points(sample_point, data_points) if data_points[idx].val[0] is np.ma.masked: assert len(out_points) == 0 else: assert len(out_points) == 1 assert out_points[0].val[0] == data_points[idx].val[0]
def test_coordinates_outside_grid_in_col_ungridded_to_ungridded_in_2d( self): ug_data = mock.make_regular_2d_ungridded_data() sample_points = UngriddedData.from_points_array([ HyperPoint(5.5, 5.5), HyperPoint(-5.5, 5.5), HyperPoint(5.5, -5.5), HyperPoint(-5.5, -5.5) ]) col = GeneralUngriddedCollocator(fill_value=-999) new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_horizontal_only())[0] eq_(new_data.data[0], 12.0) eq_(new_data.data[1], 6.0) eq_(new_data.data[2], 10.0) eq_(new_data.data[3], 4.0)
def test_basic_col_in_4d(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, mean, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_4d_ungridded_data() # Note - This isn't actually used for averaging sample_points = UngriddedData.from_points_array([ HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34)) ]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), mean())[0] eq_(new_data.data[0], 25.5)
def test_already_collocated_in_col_ungridded_to_ungridded_in_2d(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_time, SepConstraintKdtree import datetime as dt import numpy as np ug_data = mock.make_regular_2d_with_time_ungridded_data() sample_points = HyperPointList() t0 = dt.datetime(1984, 8, 27) for d in range(15): sample_points.append( HyperPoint(lat=0.0, lon=0.0, t=t0 + dt.timedelta(days=d))) sample_points = UngriddedData.from_points_array(sample_points) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_time())[0] assert (np.equal(new_data.data, np.arange(15) + 1.0).all())
def test_averaging_basic_col_in_4d(self): ug_data = mock.make_regular_4d_ungridded_data() # Note - This isn't actually used for averaging sample_points = UngriddedData.from_points_array( [HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34))]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), moments()) means = new_data[0] std_dev = new_data[1] no_points = new_data[2] eq_(means.name(), 'rainfall_flux') eq_(std_dev.name(), 'Corrected sample standard deviation of TOTAL RAINFALL RATE: LS+CONV KG/M2/S') eq_(no_points.name(), 'Number of points used to calculate the mean of TOTAL RAINFALL RATE: LS+CONV KG/M2/S') assert means.coords() assert std_dev.coords() assert no_points.coords()
def test_coordinates_exactly_between_points_in_col_ungridded_to_ungridded_in_2d( self): """ This works out the edge case where the points are exactly in the middle or two or more datapoints. The nn_time algorithm will start with the first point as the nearest and iterates through the points finding any points which are closer than the current closest. If two distances were exactly the same the first point to be chosen. """ from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_time, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_2d_with_time_ungridded_data() # Choose a time at midday sample_points = UngriddedData.from_points_array( [HyperPoint(lat=0.0, lon=0.0, t=dt.datetime(1984, 8, 29, 12))]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_time())[0] eq_(new_data.data[0], 3.0)
def test_ungridded_ungridded_box_moments_no_missing_data_for_missing_sample(self): data = mock.make_regular_2d_ungridded_data() sample = UngriddedData.from_points_array( [HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=3.0, lon=3.0, alt=7.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=-1.0, lon=-1.0, alt=5.0, t=dt.datetime(1984, 8, 29, 8, 34))]) constraint = SepConstraintKdtree('500km') kernel = moments() sample_mask = [False, True, False] sample.data = np.ma.array([0, 0, 0], mask=sample_mask) col = GeneralUngriddedCollocator(missing_data_for_missing_sample=False) output = col.collocate(sample, data, constraint, kernel) assert len(output) == 3 assert isinstance(output, UngriddedDataList) assert not any(output[0].data.mask) assert not any(output[1].data.mask) assert not any(output[2].data.mask)
def test_coordinates_outside_grid_in_col_ungridded_to_ungridded_in_2d( self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_time, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_2d_with_time_ungridded_data() sample_points = HyperPointList() sample_points.append( HyperPoint(lat=0.0, lon=0.0, t=dt.datetime(1984, 8, 26))) sample_points.append( HyperPoint(lat=0.0, lon=0.0, t=dt.datetime(1884, 8, 26))) sample_points.append( HyperPoint(lat=0.0, lon=0.0, t=dt.datetime(1994, 8, 27))) sample_points = UngriddedData.from_points_array(sample_points) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_time())[0] eq_(new_data.data[0], 1.0) eq_(new_data.data[1], 1.0) eq_(new_data.data[2], 15.0)
def test_ungridded_ungridded_box_moments(self): data = mock.make_regular_2d_ungridded_data() sample = UngriddedData.from_points_array( [HyperPoint(lat=1.0, lon=1.0, alt=12.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=3.0, lon=3.0, alt=7.0, t=dt.datetime(1984, 8, 29, 8, 34)), HyperPoint(lat=-1.0, lon=-1.0, alt=5.0, t=dt.datetime(1984, 8, 29, 8, 34))]) constraint = SepConstraintKdtree('500km') kernel = moments() col = GeneralUngriddedCollocator() output = col.collocate(sample, data, constraint, kernel) expected_result = np.array([28.0/3, 10.0, 20.0/3]) expected_stddev = np.array([1.52752523, 1.82574186, 1.52752523]) expected_n = np.array([3, 4, 3]) assert len(output) == 3 assert isinstance(output, UngriddedDataList) assert np.allclose(output[0].data, expected_result) assert np.allclose(output[1].data, expected_stddev) assert np.allclose(output[2].data, expected_n)
def test_basic_col_with_time(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, nn_time, SepConstraintKdtree import numpy as np ug_data = mock.make_MODIS_time_steps() ref = np.array([0.0, 1.0, 2.0, 3.0]) sample_points = HyperPointList() sample_points.append(HyperPoint(lat=0.0, lon=0.0, t=149751.369618055)) sample_points.append(HyperPoint( lat=0.0, lon=0.0, t=149759.378055556, )) sample_points.append(HyperPoint(lat=0.0, lon=0.0, t=149766.373969907)) sample_points.append(HyperPoint(lat=0.0, lon=0.0, t=149776.375995371)) sample_points = UngriddedData.from_points_array(sample_points) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), nn_time())[0] assert (np.equal(new_data.data, ref).all())
def test_basic_col_in_4d_with_pressure_not_altitude(self): from cis.collocation.col_implementations import GeneralUngriddedCollocator, moments, SepConstraintKdtree import datetime as dt ug_data = mock.make_regular_4d_ungridded_data() # Note - This isn't actually used for averaging sample_points = UngriddedData.from_points_array([ HyperPoint(lat=1.0, lon=1.0, pres=12.0, t=dt.datetime(1984, 8, 29, 8, 34)) ]) col = GeneralUngriddedCollocator() new_data = col.collocate(sample_points, ug_data, SepConstraintKdtree(), moments()) means = new_data[0] std_dev = new_data[1] no_points = new_data[2] eq_(means.data[0], 25.5) assert_almost_equal(std_dev.data[0], np.sqrt(212.5)) eq_(no_points.data[0], 50)
def test_horizontal_constraint_in_2d_when_lats_are_the_same_produces_a_balanced_tree( self): ug_data = mock.make_regular_2d_ungridded_data(lat_dim_length=1001, lat_max=10, lat_min=10) ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1) sample_points = pd.DataFrame(data={ 'longitude': [-2.5], 'latitude': [7.5] }) coord_map = None # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees # in each direction constraint = SepConstraintKdtree(h_sep=400) index = HaversineDistanceKDTreeIndex() index.index_data(sample_points, ug_data_points, coord_map, leafsize=2) depth = self.get_max_depth(index.index.tree, 0) assert_that(depth, is_(2), "Depth is 2, there are three unique values -10, 0, 10")