def test_aggregation_two_dims_using_moments_kernel(self): self.kernel = moments() data1 = make_regular_2d_ungridded_data_with_missing_values() data2 = make_regular_2d_ungridded_data_with_missing_values() data2.metadata._name = 'snow' data2._data += 10 data = UngriddedDataList([data1, data2]) grid = {'y': slice(-12.5, 12.5, 15), 'x': slice(-7.5, 7.5, 10)} output = data.aggregate(how=self.kernel, **grid) expect_mean = numpy.array([[4.4, 4.5], [35.0 / 3, 13.5]]) expect_stddev = numpy.array([[numpy.sqrt(9.3), numpy.sqrt(4.5)], [numpy.sqrt(13.0 / 3), numpy.sqrt(4.5)]]) expect_count = numpy.array([[5, 2], [3, 2]]) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output assert mean_1.var_name == 'rain' assert stddev_1.var_name == 'rain_std_dev' assert count_1.var_name == 'rain_num_points' assert mean_2.var_name == 'snow' assert stddev_2.var_name == 'snow_std_dev' assert count_2.var_name == 'snow_num_points' assert_arrays_almost_equal(mean_1.data, expect_mean) assert_arrays_almost_equal(stddev_1.data, expect_stddev) assert_arrays_almost_equal(count_1.data, expect_count) assert_arrays_almost_equal(mean_2.data, expect_mean + 10) assert_arrays_almost_equal(stddev_2.data, expect_stddev) assert_arrays_almost_equal(count_2.data, expect_count)
def test_aggregating_list_of_datasets_over_two_dims_with_diff_masks(self): grid = {'x': slice(-7.5, 7.5, 5), 'y': slice(-12.5, 12.5, 5)} var_0 = make_regular_2d_ungridded_data_with_missing_values() var_1 = make_regular_2d_ungridded_data_with_missing_values() var_1.data.mask = 1 datalist = UngriddedDataList([var_0, var_1]) cube_out = datalist.aggregate(how=self.kernel, **grid) result_0 = numpy.ma.array( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0], [13.0, 14.0, 15.0]], mask=[[0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 0, 0]], fill_value=float('nan')) result_1 = numpy.ma.array( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0], [13.0, 14.0, 15.0]], mask=[[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], fill_value=float('nan')) print(cube_out[0].data.fill_value) assert len(cube_out) == 2 compare_masked_arrays(cube_out[0].data, result_0) compare_masked_arrays(cube_out[1].data, result_1)
def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord( x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord( y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) self.coords = CoordList([x, y]) ug1 = UngriddedData( data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) ug2 = UngriddedData( data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list = UngriddedDataList([ug1, ug2])
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param UngriddedData or UngriddedCoordinates points: Objects defining the sample points :param GriddedData or GriddedDataList data: Data to resample :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return: A single LazyData object """ from cis.collocation.gridded_interpolation import GriddedUngriddedInterpolator log_memory_profile("GriddedUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output if constraint is not None and not isinstance(constraint, DummyConstraint): raise ValueError("A constraint cannot be specified for the GriddedUngriddedCollocator") data_points = data # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data_points) log_memory_profile("GriddedUngriddedCollocator after data retrieval") logging.info("--> Collocating...") logging.info(" {} sample points".format(points.size)) if self.interpolator is None: # Cache the interpolator self.interpolator = GriddedUngriddedInterpolator(data, points, kernel, self.missing_data_for_missing_sample) values = self.interpolator(data, fill_value=self.fill_value, extrapolate=self.extrapolate) log_memory_profile("GriddedUngriddedCollocator after running kernel on sample points") metadata = Metadata(self.var_name or data.var_name, long_name=self.var_long_name or data.long_name, shape=values.shape, missing_value=self.fill_value, units=self.var_units or data.units) set_standard_name_if_valid(metadata, data.standard_name) return_data = UngriddedDataList([UngriddedData(values, metadata, points.coords())]) log_memory_profile("GriddedUngriddedCollocator final") return return_data
def test_GIVEN_UngriddedDataList_WHEN_constrain_THEN_correctly_subsetted_UngriddedDataList_returned(self): ug_data = cis.test.util.mock.make_regular_2d_ungridded_data() ug_data2 = UngriddedData(ug_data.data + 1, Metadata(name='snow', standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), ug_data.coords()) datalist = UngriddedDataList([ug_data, ug_data2]) subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 10.0]) assert isinstance(subset, UngriddedDataList) assert subset[0].data.tolist() == [5, 6, 8, 9, 11, 12, 14, 15] assert subset[1].data.tolist() == [6, 7, 9, 10, 12, 13, 15, 16]
def test_combining(self): from cis.test.util.mock import make_regular_2d_ungridded_data another_list = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()]) # Test adding assert_that(isinstance(self.ungridded_data_list + another_list, UngriddedDataList)) # Test extending another_list.extend(self.ungridded_data_list) assert_that(isinstance(another_list, UngriddedDataList)) assert_that(len(another_list) == 4) # Test can't add single items with assert_raises(TypeError): self.ungridded_data_list + another_list[0]
def test_multiple_line(self): from cis.test.util.mock import make_dummy_ungridded_data_time_series from cis.data_io.ungridded_data import UngriddedDataList d = UngriddedDataList([make_dummy_ungridded_data_time_series(), make_dummy_ungridded_data_time_series()]) _ = d[0].data d[1].data += 2.0 d[1].metadata._name = 'snow' d.plot(how='line') self.check_graphic()
def mask_data(data, cad_score, extinction_qc, cad_confidence=20): """ Default CAD confidence of 80 from doi:10.1002/2013JD019527 The extinction QC values are:: Bit Value Interpretation 1 0 unconstrained retrieval; initial lidar ratio unchanged during solution process 1 1 constrained retrieval 2 2 Initial lidar ratio reduced to prevent divergence of extinction solution 3 4 Initial lidar ratio increased to reduce the number of negative extinction coefficients in the derived solution 4 8 Calculated backscatter coefficient exceeds the maximum allowable value 5 16 Layer being analyzed has been identified by the feature finder as being totally attenuating (i.e., opaque) 6 32 Estimated optical depth error exceeds the maximum allowable value 7 64 Solution converges, but with an unacceptably large number of negative values 8 128 Retrieval terminated at maximum iterations 9 256 No solution possible within allowable lidar ratio bounds 16 32768 Fill value or no solution attempted :param CommonDataList data: The data to be masked :param cad_score: :param extinction_qc: :param cad_confidence: :return: """ from cis.data_io.ungridded_data import UngriddedDataList column_mask = find_good_aerosol_columns(cad_score, cad_confidence) & find_good_extinction_columns(extinction_qc) # Now do the full profiles. Pull out the valid parts of the aerosol and extinction masks good_extinctions = _find_converged_extinction_points(extinction_qc.data[column_mask]) aerosols = _find_aerosol(cad_score.data[column_mask], cad_confidence) # First create the aerosol masked data (which is a shared mask) compressed_data = UngriddedDataList() for d in data: if d.data.shape[0] != column_mask.shape[0]: # This only outputs a warning in numpy currently raise ValueError("The data shape doesn't match the mask shape") c = d[column_mask] # If the data has (an extended) second dimension if len(c.shape) > 1 and c.shape[1] > 1: # Apply the aerosol (2D) mask c.data = apply_mask_to_numpy_array(c.data, ~aerosols) if c.name().startswith('Extinction'): # Apply the good extinction (2D) mask c.data = apply_mask_to_numpy_array(c.data, ~good_extinctions) compressed_data.append(c) print("Valid {} points: {}".format(c.name(), c.count())) return compressed_data
def constrain(self, data): """Subsets the supplied data. :param data: data to be subsetted :return: subsetted data """ import numpy as np from datetime import datetime from cis.data_io.ungridded_data import UngriddedDataList if isinstance(data, list): # Calculating masks and indices will only take place on the first iteration, # so we can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.append(self.constrain(var)) return output _data = self._create_data_for_subset(data) _shape = self._limits.pop('shape', None) if self._combined_mask is None: # Create the combined mask across all limits shape = _data.coords( )[0].data.shape # This assumes they are all the same shape combined_mask = np.ones(shape, dtype=bool) for coord, limit in self._limits.items(): # Convert the points to datetimes if the limit is a datetime if isinstance(limit.start, datetime): points = _data.coord(coord).units.num2date( _data.coord(coord).data) else: points = _data.coord(coord).data # Select any points which are <= to the stop limit AND >= to the start limit mask = (np.less_equal(points, limit.stop) & np.greater_equal(points, limit.start)) combined_mask &= mask self._combined_mask = combined_mask _data = _data[self._combined_mask] if _shape is not None: if self._shape_indices is None: self._shape_indices = _get_ungridded_subset_region_indices( _data, _shape) _data = _data[np.unravel_index(self._shape_indices, _data.shape)] if _data.size == 0: _data = None return _data
def test_explicit_comparative_scatter(self): from cis.test.util.mock import make_regular_2d_ungridded_data from cis.data_io.ungridded_data import UngriddedDataList d = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data(data_offset=2)]) # This is needed to setup the coord shapes unfortunately... # TODO: Fix this in the Coord somewhere _ = d[0].data _ = d[1].data d[0].metadata._name = 'snow' d.plot(how='comparativescatter') self.check_graphic()
def test_can_create_list_from_generators_and_other_iterators(self): from cis.test.util.mock import make_regular_2d_ungridded_data import itertools another_list = UngriddedDataList((make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data())) assert_that(len(another_list) == 2) dict = { 1: [make_regular_2d_ungridded_data()], 2: [make_regular_2d_ungridded_data()] } another_list = UngriddedDataList( itertools.chain.from_iterable(d for d in dict.values())) assert_that(len(another_list) == 2)
def test_layer_opts(self): from cis.test.util.mock import make_dummy_ungridded_data_time_series from cis.data_io.ungridded_data import UngriddedDataList d = UngriddedDataList([make_dummy_ungridded_data_time_series(), make_dummy_ungridded_data_time_series()]) _ = d[0].data d[1].data += 2.0 d[1].metadata._name = 'snow' d.plot(how='line', layer_opts=[dict(c='yellow', itemstyle='dotted'), dict(c='purple', itemstyle='dashed')]) self.check_graphic()
def constrain(self, data): """Subsets the supplied data. :param data: data to be subsetted :return: subsetted data """ import numpy as np from datetime import datetime from cis.data_io.ungridded_data import UngriddedDataList if isinstance(data, list): # Calculating masks and indices will only take place on the first iteration, # so we can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.append(self.constrain(var)) return output _data = self._create_data_for_subset(data) _shape = self._limits.pop('shape', None) if self._combined_mask is None: # Create the combined mask across all limits shape = _data.coords()[0].data.shape # This assumes they are all the same shape combined_mask = np.ones(shape, dtype=bool) for coord, limit in self._limits.items(): # Convert the points to datetimes if the limit is a datetime if isinstance(limit.start, datetime): points = _data.coord(coord).units.num2date(_data.coord(coord).data) else: points = _data.coord(coord).data # Select any points which are <= to the stop limit AND >= to the start limit mask = (np.less_equal(points, limit.stop) & np.greater_equal(points, limit.start)) combined_mask &= mask self._combined_mask = combined_mask _data = _data[self._combined_mask] if _shape is not None: if self._shape_indices is None: self._shape_indices = _get_ungridded_subset_region_indices(_data, _shape) _data = _data[np.unravel_index(self._shape_indices, _data.shape)] if _data.size == 0: _data = None return _data
def read_data_list(self, filenames, variables, product=None, aliases=None): """ Read multiple data objects. Files can be either gridded or ungridded but not a mix of both. :param filenames: One or more filenames of the files to read :type filenames: string or list :param variables: One or more variables to read from the files :type variables: string or list :param str product: Name of data product to use (optional) :param aliases: List of variable aliases to put on each variables data object as an alternative means of identifying them. (Optional) :return: A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the type of data contained in the files) """ # if filenames or variables are not lists, make them lists of 1 element filenames = listify(filenames) variables = listify(variables) aliases = listify(aliases) if aliases else None variables = self._expand_wildcards(variables, filenames, product) data_list = None for idx, variable in enumerate(variables): var_data = self._get_data_func(filenames, variable, product) var_data.filenames = filenames if aliases: try: var_data.alias = aliases[idx] except IndexError: raise ValueError("Number of aliases does not match number of variables") if data_list is None: data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList() data_list.append(var_data) assert data_list is not None return data_list
def test_GIVEN_UngriddedDataList_WHEN_constrain_THEN_correctly_subsetted_UngriddedDataList_returned( self): ug_data = cis.test.util.mock.make_regular_2d_ungridded_data() ug_data2 = UngriddedData( ug_data.data + 1, Metadata(name='snow', standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), ug_data.coords()) datalist = UngriddedDataList([ug_data, ug_data2]) subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 10.0]) assert isinstance(subset, UngriddedDataList) assert subset[0].data.tolist() == [5, 6, 8, 9, 11, 12, 14, 15] assert subset[1].data.tolist() == [6, 7, 9, 10, 12, 13, 15, 16]
def test_list_ungridded_ungridded_box_mean(self): ug_data_1 = mock.make_regular_2d_ungridded_data() ug_data_2 = mock.make_regular_2d_ungridded_data(data_offset=3) ug_data_2.long_name = 'TOTAL SNOWFALL RATE: LS+CONV KG/M2/S' ug_data_2.standard_name = 'snowfall_flux' ug_data_2.metadata._name = 'snow' data_list = UngriddedDataList([ug_data_1, ug_data_2]) sample_points = mock.make_regular_2d_ungridded_data() constraint = SepConstraintKdtree('500km') kernel = moments() col = GeneralUngriddedCollocator() output = col.collocate(sample_points, data_list, constraint, kernel) expected_result = np.array(list(range(1, 16))) expected_n = np.array(15 * [1]) assert len(output) == 6 assert isinstance(output, UngriddedDataList) assert output[3].var_name == 'snow' assert output[4].var_name == 'snow_std_dev' assert output[5].var_name == 'snow_num_points' assert np.allclose(output[0].data, expected_result) assert all(output[1].data.mask) assert np.allclose(output[2].data, expected_n) assert np.allclose(output[3].data, expected_result + 3) assert all(output[4].data.mask) assert np.allclose(output[5].data, expected_n)
def test_combining(self): from cis.test.util.mock import make_regular_2d_ungridded_data another_list = UngriddedDataList([ make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data() ]) # Test adding assert_that( isinstance(self.ungridded_data_list + another_list, UngriddedDataList)) # Test extending another_list.extend(self.ungridded_data_list) assert_that(isinstance(another_list, UngriddedDataList)) assert_that(len(another_list) == 4) # Test can't add single items with assert_raises(TypeError): self.ungridded_data_list + another_list[0]
class TestUngriddedDataList(TestCase): def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) self.coords = CoordList([x, y]) ug1 = UngriddedData(data, Metadata(standard_name='rain', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snow', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list = UngriddedDataList([ug1, ug2]) def test_GIVEN_data_containing_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(self): unique_coords = self.ungridded_data_list.coords() assert_that(len(unique_coords), is_(2)) assert_that(isinstance(unique_coords, CoordList)) coord_names = [coord.standard_name for coord in unique_coords] assert_that(coord_names, contains_inanyorder('latitude', 'longitude')) @skip_pandas def test_GIVEN_multiple_ungridded_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): df = self.ungridded_data_list.as_data_frame() assert_that(df['rain'][5] == 6) assert_almost_equal(df['snow'][5], 0.6) assert_that(df['lat'][13] == 10) assert_that(df['lon'][0] == -5) @skip_pandas def test_GIVEN_multiple_ungridded_data_with_missing_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): d = np.reshape(np.arange(15) + 10.0, (5, 3)) data = np.ma.masked_array(d, np.zeros(d.shape, dtype=bool)) data.mask[1,2] = True ug3 = UngriddedData(data, Metadata(standard_name='hail', long_name="TOTAL HAIL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list.append(ug3) df = self.ungridded_data_list.as_data_frame() assert_that(df['rain'][5] == 6) assert_almost_equal(df['snow'][5], 0.6) assert_that(df['lat'][13] == 10) assert_that(df['lon'][0] == -5) assert_almost_equal(df['hail'][1], 11.0) assert_that(np.isnan(df['hail'][np.ravel_multi_index([1, 2], (5, 3))])) self.ungridded_data_list.pop()
def test_GIVEN_grids_contain_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) coords = CoordList([x, y]) ug1 = UngriddedData(data, Metadata(standard_name='rain', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), coords) ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snow', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), coords) ungridded_data_list = UngriddedDataList([ug1, ug2]) unique_coords = ungridded_data_list.coords() assert_that(len(unique_coords), is_(2)) assert_that(isinstance(unique_coords, CoordList)) coord_names = [coord.standard_name for coord in unique_coords] assert_that(coord_names, contains_inanyorder('latitude', 'longitude'))
def test_implicit_comparative_scatter(self): from cis.test.util.mock import make_regular_2d_ungridded_data from cis.data_io.ungridded_data import UngriddedDataList d = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data(data_offset=2)]) # This is needed to setup the coord shapes unfortunately... _ = d[0].data _ = d[1].data d[0].metadata._name = 'snow' d[1].plot(xaxis=d[0]) self.check_graphic()
def test_aggregating_list_of_datasets_over_two_dims(self): grid = {'x': slice(-7.5, 7.5, 5), 'y': slice(-12.5, 12.5, 5)} datalist = UngriddedDataList([make_regular_2d_ungridded_data_with_missing_values(), make_regular_2d_ungridded_data_with_missing_values()]) cube_out = datalist.aggregate(how=self.kernel, **grid) result = numpy.ma.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0], [13.0, 14.0, 15.0]], mask=[[0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0], [1, 0, 0]], fill_value=float('nan')) assert len(cube_out) == 2 compare_masked_arrays(cube_out[0].data, result) compare_masked_arrays(cube_out[1].data, result)
def create_data_object(self, filenames, variable): from itertools import product logging.debug("Creating data object for variable " + variable) # reading coordinates # the variable here is needed to work out whether to apply interpolation to the lat/lon data or not coords = self._create_coord_list(filenames, variable) # reading of variables sdata, vdata = hdf.read(filenames, variable) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") # Check the dimension of this variable _, ndim, dim_len, _, _ = var[0].info() if ndim == 2: return UngriddedData(var, metadata, coords, _get_MODIS_SDS_data) elif ndim < 2: raise NotImplementedError("1D field in MODIS L2 data.") else: result = UngriddedDataList() # Iterate over all but the last two dimensions ranges = [range(n) for n in dim_len[:-2]] for indices in product(*ranges): for manager in var: manager._start = list(indices) + [0, 0] manager._count = [1 ] * len(indices) + manager.info()[2][-2:] result.append( UngriddedData(var, metadata, coords.copy(), _get_MODIS_SDS_data)) return result
def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) self.coords = CoordList([x, y]) ug1 = UngriddedData(data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list = UngriddedDataList([ug1, ug2])
def test_list_of_data(self): sample = mock.make_regular_2d_ungridded_data() data = UngriddedDataList([ mock.make_regular_2d_ungridded_data(data_offset=5), mock.make_regular_2d_ungridded_data(data_offset=10) ]) col = DummyCollocator() con = None kernel = None output = col.collocate(sample, data, con, kernel) assert len(output) == 2 assert numpy.array_equal(output[0].data, data[0].data) assert numpy.array_equal(output[1].data, data[1].data)
class TestUngriddedDataList(TestCase): def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord( x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord( y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) self.coords = CoordList([x, y]) ug1 = UngriddedData( data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) ug2 = UngriddedData( data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list = UngriddedDataList([ug1, ug2]) def test_slicing(self): single_item = self.ungridded_data_list[1] assert_that(isinstance(single_item, UngriddedData)) many_items = self.ungridded_data_list[0:1] assert_that(isinstance(many_items, UngriddedDataList)) many_items = self.ungridded_data_list[0:] assert_that(isinstance(many_items, UngriddedDataList)) def test_combining(self): from cis.test.util.mock import make_regular_2d_ungridded_data another_list = UngriddedDataList([ make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data() ]) # Test adding assert_that( isinstance(self.ungridded_data_list + another_list, UngriddedDataList)) # Test extending another_list.extend(self.ungridded_data_list) assert_that(isinstance(another_list, UngriddedDataList)) assert_that(len(another_list) == 4) # Test can't add single items with assert_raises(TypeError): self.ungridded_data_list + another_list[0] def test_can_get_string_of_list(self): s = str(self.ungridded_data_list) assert_that( s == "UngriddedDataList: \n0: Ungridded data: rainfall_flux / (kg m-2 s-1) \n" "1: Ungridded data: snowfall_flux / (kg m-2 s-1) \nCoordinates: \n latitude\n longitude\n" ) def test_GIVEN_data_containing_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned( self): unique_coords = self.ungridded_data_list.coords() assert_that(len(unique_coords), is_(2)) assert_that(isinstance(unique_coords, CoordList)) coord_names = [coord.standard_name for coord in unique_coords] assert_that(coord_names, contains_inanyorder('latitude', 'longitude')) def test_can_create_list_from_generators_and_other_iterators(self): from cis.test.util.mock import make_regular_2d_ungridded_data import itertools another_list = UngriddedDataList((make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data())) assert_that(len(another_list) == 2) dict = { 1: [make_regular_2d_ungridded_data()], 2: [make_regular_2d_ungridded_data()] } another_list = UngriddedDataList( itertools.chain.from_iterable(d for d in dict.values())) assert_that(len(another_list) == 2) @skip_pandas def test_GIVEN_multiple_ungridded_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame( self): df = self.ungridded_data_list.as_data_frame() assert_that(df['rainfall_flux'][5] == 6) assert_almost_equal(df['snowfall_flux'][5], 0.6) assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) @skip_pandas def test_GIVEN_multiple_ungridded_data_with_missing_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame( self): d = np.reshape(np.arange(15) + 10.0, (5, 3)) data = np.ma.masked_array(d, np.zeros(d.shape, dtype=bool)) data.mask[1, 2] = True ug3 = UngriddedData( data, Metadata(name='hail', long_name="TOTAL HAIL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list.append(ug3) df = self.ungridded_data_list.as_data_frame() assert_that(df['rainfall_flux'][5] == 6) assert_almost_equal(df['snowfall_flux'][5], 0.6) assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) assert_almost_equal(df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][1], 11.0) assert_that( np.isnan( df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][np.ravel_multi_index( [1, 2], (5, 3))])) self.ungridded_data_list.pop()
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param UngriddedData or UngriddedCoordinates points: Object defining the sample points :param UngriddedData data: The source data to collocate from :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return UngriddedData or UngriddedDataList: Depending on the input """ log_memory_profile("GeneralUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data) # Convert to dataframes for fancy indexing sample_points = points.as_data_frame(time_index=False, name='vals') data_points = data.as_data_frame(time_index=False, name='vals').dropna(axis=0) log_memory_profile("GeneralUngriddedCollocator after data retrieval") # Create index if constraint and/or kernel require one. coord_map = None data_index.create_indexes(constraint, points, data_points, coord_map) log_memory_profile("GeneralUngriddedCollocator after indexing") logging.info("--> Collocating...") # Create output arrays. self.var_name = data.var_name self.var_long_name = data.long_name self.var_standard_name = data.standard_name self.var_units = data.units var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name, self.var_standard_name, self.var_units) sample_points_count = len(sample_points) # Create an empty masked array to store the collocated values. The elements will be unmasked by assignment. values = np.ma.masked_all((len(var_set_details), sample_points_count)) values.fill_value = self.fill_value log_memory_profile("GeneralUngriddedCollocator after output array creation") logging.info(" {} sample points".format(sample_points_count)) # Apply constraint and/or kernel to each sample point. if isinstance(kernel, nn_horizontal_only): # Only find the nearest point using the kd-tree, without constraint in other dimensions nearest_points = data_points.iloc[constraint.haversine_distance_kd_tree_index.find_nearest_point(sample_points)] values[0, :] = nearest_points.vals.values else: for i, point, con_points in constraint.get_iterator(self.missing_data_for_missing_sample, None, None, data_points, None, sample_points, None): try: values[:, i] = kernel.get_value(point, con_points) # Kernel returns either a single value or a tuple of values to insert into each output variable. except CoordinateMultiDimError as e: raise NotImplementedError(e) except ValueError as e: pass log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points") # Mask any bad values values = np.ma.masked_invalid(values) return_data = UngriddedDataList() for idx, var_details in enumerate(var_set_details): var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),), missing_value=self.fill_value, units=var_details[3]) set_standard_name_if_valid(var_metadata, var_details[2]) return_data.append(UngriddedData(values[idx, :], var_metadata, points.coords())) log_memory_profile("GeneralUngriddedCollocator final") return return_data
class TestUngriddedDataList(TestCase): def setUp(self): x_points = np.arange(-10, 11, 5) y_points = np.arange(-5, 6, 5) y, x = np.meshgrid(y_points, x_points) x = Coord(x, Metadata(name='lat', standard_name='latitude', units='degrees')) y = Coord(y, Metadata(name='lon', standard_name='longitude', units='degrees')) data = np.reshape(np.arange(15) + 1.0, (5, 3)) self.coords = CoordList([x, y]) ug1 = UngriddedData(data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) ug2 = UngriddedData(data * 0.1, Metadata(standard_name='snowfall_flux', long_name="TOTAL SNOWFALL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list = UngriddedDataList([ug1, ug2]) def test_slicing(self): single_item = self.ungridded_data_list[1] assert_that(isinstance(single_item, UngriddedData)) many_items = self.ungridded_data_list[0:1] assert_that(isinstance(many_items, UngriddedDataList)) many_items = self.ungridded_data_list[0:] assert_that(isinstance(many_items, UngriddedDataList)) def test_combining(self): from cis.test.util.mock import make_regular_2d_ungridded_data another_list = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()]) # Test adding assert_that(isinstance(self.ungridded_data_list + another_list, UngriddedDataList)) # Test extending another_list.extend(self.ungridded_data_list) assert_that(isinstance(another_list, UngriddedDataList)) assert_that(len(another_list) == 4) # Test can't add single items with assert_raises(TypeError): self.ungridded_data_list + another_list[0] def test_can_get_string_of_list(self): s = str(self.ungridded_data_list) assert_that(s == "UngriddedDataList: \n0: Ungridded data: rainfall_flux / (kg m-2 s-1) \n" "1: Ungridded data: snowfall_flux / (kg m-2 s-1) \nCoordinates: \n latitude\n longitude\n") def test_GIVEN_data_containing_multiple_matching_coordinates_WHEN_coords_THEN_only_unique_coords_returned(self): unique_coords = self.ungridded_data_list.coords() assert_that(len(unique_coords), is_(2)) assert_that(isinstance(unique_coords, CoordList)) coord_names = [coord.standard_name for coord in unique_coords] assert_that(coord_names, contains_inanyorder('latitude', 'longitude')) def test_can_create_list_from_generators_and_other_iterators(self): from cis.test.util.mock import make_regular_2d_ungridded_data import itertools another_list = UngriddedDataList((make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data())) assert_that(len(another_list) == 2) dict = {1: [make_regular_2d_ungridded_data()], 2: [make_regular_2d_ungridded_data()]} another_list = UngriddedDataList(itertools.chain.from_iterable(d for d in dict.values())) assert_that(len(another_list) == 2) @skip_pandas def test_GIVEN_multiple_ungridded_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): df = self.ungridded_data_list.as_data_frame() assert_that(df['rainfall_flux'][5] == 6) assert_almost_equal(df['snowfall_flux'][5], 0.6) assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) @skip_pandas def test_GIVEN_multiple_ungridded_data_with_missing_data_WHEN_call_as_data_frame_THEN_returns_valid_data_frame(self): d = np.reshape(np.arange(15) + 10.0, (5, 3)) data = np.ma.masked_array(d, np.zeros(d.shape, dtype=bool)) data.mask[1,2] = True ug3 = UngriddedData(data, Metadata(name='hail', long_name="TOTAL HAIL RATE: LS+CONV KG/M2/S", units="kg m-2 s-1", missing_value=-999), self.coords) self.ungridded_data_list.append(ug3) df = self.ungridded_data_list.as_data_frame() assert_that(df['rainfall_flux'][5] == 6) assert_almost_equal(df['snowfall_flux'][5], 0.6) assert_that(df['latitude'][13] == 10) assert_that(df['longitude'][0] == -5) assert_almost_equal(df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][1], 11.0) assert_that(np.isnan(df['TOTAL HAIL RATE: LS+CONV KG/M2/S'][np.ravel_multi_index([1, 2], (5, 3))])) self.ungridded_data_list.pop()
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param points: UngriddedData or UngriddedCoordinates defining the sample points :param data: An UngriddedData object or Cube, or any other object containing metadata that the constraint object can read. May also be a list of objects, in which case a list will be returned :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return: A single LazyData object """ log_memory_profile("GeneralUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output metadata = data.metadata sample_points = points.get_all_points() # Convert ungridded data to a list of points if kernel needs it. # Special case checks for kernels that use a cube - this could be done more elegantly. if isinstance(kernel, nn_gridded) or isinstance(kernel, li): if hasattr(kernel, "interpolator"): # If we have an interpolator on the kernel we need to reset it as it depends on the actual values # as well as the coordinates kernel.interpolator = None kernel.coord_names = [] if not isinstance(data, iris.cube.Cube): raise ValueError("Ungridded data cannot be used with kernel nn_gridded or li") if constraint is not None and not isinstance(constraint, DummyConstraint): raise ValueError("A constraint cannot be specified with kernel nn_gridded or li") data_points = data else: data_points = data.get_non_masked_points() # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), sample_points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data_points) log_memory_profile("GeneralUngriddedCollocator after data retrieval") # Create index if constraint and/or kernel require one. coord_map = None data_index.create_indexes(constraint, points, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) log_memory_profile("GeneralUngriddedCollocator after indexing") logging.info("--> Collocating...") # Create output arrays. self.var_name = data.name() self.var_long_name = metadata.long_name self.var_standard_name = metadata.standard_name self.var_units = data.units var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name, self.var_standard_name, self.var_units) sample_points_count = len(sample_points) values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value log_memory_profile("GeneralUngriddedCollocator after output array creation") logging.info(" {} sample points".format(sample_points_count)) # Apply constraint and/or kernel to each sample point. cell_count = 0 total_count = 0 for i, point in sample_points.enumerate_non_masked_points(): # Log progress periodically. cell_count += 1 if cell_count == 1000: total_count += cell_count cell_count = 0 logging.info(" Processed {} points of {}".format(total_count, sample_points_count)) if constraint is None: con_points = data_points else: con_points = constraint.constrain_points(point, data_points) try: value_obj = kernel.get_value(point, con_points) # Kernel returns either a single value or a tuple of values to insert into each output variable. if isinstance(value_obj, tuple): for idx, val in enumerate(value_obj): if not np.isnan(val): values[idx, i] = val else: values[0, i] = value_obj except CoordinateMultiDimError as e: raise NotImplementedError(e) except ValueError as e: pass log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points") return_data = UngriddedDataList() for idx, var_details in enumerate(var_set_details): if idx == 0: new_data = UngriddedData(values[0, :], metadata, points.coords()) new_data.metadata._name = var_details[0] new_data.metadata.long_name = var_details[1] cis.utils.set_cube_standard_name_if_valid(new_data, var_details[2]) new_data.metadata.shape = (len(sample_points),) new_data.metadata.missing_value = self.fill_value new_data.units = var_details[2] else: var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),), missing_value=self.fill_value, units=var_details[2]) new_data = UngriddedData(values[idx, :], var_metadata, points.coords()) return_data.append(new_data) log_memory_profile("GeneralUngriddedCollocator final") return return_data