def test_complete_collapse_one_dim_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) output = data.collapsed(['x'], how=self.kernel) expect_mean = numpy.array([[5.5, 8.75, 9]]) expect_stddev = numpy.array([numpy.sqrt(15), numpy.sqrt(26.25), numpy.sqrt(30)]) expect_count = numpy.array([[4, 4, 4]]) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' assert_arrays_almost_equal(mean_1.data, expect_mean) assert_arrays_almost_equal(mean_2.data, expect_mean + 10) assert_arrays_almost_equal(stddev_1.data, expect_stddev) assert_arrays_almost_equal(stddev_2.data, expect_stddev) assert_arrays_almost_equal(count_1.data, expect_count) assert_arrays_almost_equal(count_2.data, expect_count)
def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord( self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube( make_mock_cube(time_dim_length=7, hybrid_pr_len=5)) data2 = make_from_cube( make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['t', 'x'], how=self.kernel) result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0], [156.0, 157.0, 158.0, 159.0, 160.0], [261.0, 262.0, 263.0, 264.0, 265.0], [366.0, 367.0, 368.0, 369.0, 370.0], [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float) multidim_coord_points = numpy.array( [1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal(cube_out[1].data, result_data + 1) assert_arrays_almost_equal( cube_out[0].coord('surface_air_pressure').points, multidim_coord_points) assert_arrays_almost_equal( cube_out[1].coord('surface_air_pressure').points, multidim_coord_points)
def test_gridded_list_write_time_as_unlimited_dimension(self): data = GriddedDataList( [make_from_cube(make_mock_cube(time_dim_length=7))]) data[0].var_name = 'rain' data.save_data(tmp_file) self.d = Dataset(tmp_file) assert self.d.dimensions['time'].isunlimited()
def test_gridded_list_write_no_time_has_no_unlimited_dimension(self): data = GriddedDataList([make_from_cube(make_mock_cube())]) data[0].var_name = 'rain' data.save_data(tmp_file) self.d = Dataset(tmp_file) for d in self.d.dimensions.values(): assert not d.isunlimited()
def aggregate_gridded(self, kernel): # Make sure all coordinate have bounds - important for weighting and aggregating # Only try and guess bounds on Dim Coords for coord in self.data.coords(dim_coords=True): if not coord.has_bounds() and len(coord.points) > 1: coord.guess_bounds() logging.warning("Creating guessed bounds as none exist in file") new_coord_number = self.data.coord_dims(coord) self.data.remove_coord(coord.name()) self.data.add_dim_coord(coord, new_coord_number) coords = [] for coord in self.data.coords(): grid, guessed_axis = self.get_grid(coord) if grid is not None: if isnan(grid.delta): logging.info('Aggregating on ' + coord.name() + ', collapsing completely and using ' + kernel.cell_method + ' kernel.') coords.append(coord) else: raise NotImplementedError("Aggregation using partial collapse of " "coordinates is not supported for GriddedData") output = GriddedDataList([]) if isinstance(kernel, MultiKernel): for sub_kernel in kernel.sub_kernels: sub_kernel_out = self._gridded_full_collapse(coords, sub_kernel) output.append_or_extend(sub_kernel_out) else: output.append_or_extend(self._gridded_full_collapse(coords, kernel)) return output
def test_complete_collapse_two_dims_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) output = data.collapsed(['x', 'y'], how=self.kernel) expect_mean = numpy.array(7.75) expect_stddev = numpy.array(numpy.sqrt(244.25 / 11)) expect_count = numpy.array(12) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' # Latitude area weighting means these aren't quite right so increase the rtol. assert numpy.allclose(mean_1.data, expect_mean, 1e-3) assert numpy.allclose(mean_2.data, expect_mean + 10, 1e-3) assert numpy.allclose(stddev_1.data, expect_stddev) assert numpy.allclose(stddev_2.data, expect_stddev) assert numpy.allclose(count_1.data, expect_count) assert numpy.allclose(count_2.data, expect_count)
def read_data_list(self, filenames, variables, product=None, aliases=None): """ Read multiple data objects. Files can be either gridded or ungridded but not a mix of both. :param filenames: One or more filenames of the files to read :type filenames: string or list :param variables: One or more variables to read from the files :type variables: string or list :param str product: Name of data product to use (optional) :param aliases: List of variable aliases to put on each variables data object as an alternative means of identifying them. (Optional) :return: A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the type of data contained in the files) """ # if filenames or variables are not lists, make them lists of 1 element filenames = listify(filenames) variables = listify(variables) aliases = listify(aliases) if aliases else None variables = self._expand_wildcards(variables, filenames, product) data_list = None for idx, variable in enumerate(variables): var_data = self._get_data_func(filenames, variable, product) var_data.filenames = filenames if aliases: try: var_data.alias = aliases[idx] except IndexError: raise ValueError("Number of aliases does not match number of variables") if data_list is None: data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList() data_list.append(var_data) assert data_list is not None return data_list
def stats_cmd(main_arguments): """ Main routine for handling calls to the statistics command. :param main_arguments: The command line arguments (minus the stats command) """ from cis.stats import StatsAnalyzer from cis.data_io.gridded_data import GriddedDataList data_reader = DataReader() data_list = data_reader.read_datagroups(main_arguments.datagroups) analyzer = StatsAnalyzer(*data_list) results = analyzer.analyze() header = "RESULTS OF STATISTICAL COMPARISON:" note = "Compared all points which have non-missing values in both variables" header_length = max(len(header), len(note)) print(header_length * '=') print(header) print(header_length * '-') print(note) print(header_length * '=') for result in results: print(result.pprint()) if main_arguments.output: cubes = GriddedDataList([result.as_cube() for result in results]) variables = [] filenames = [] for datagroup in main_arguments.datagroups: variables.extend(datagroup['variables']) filenames.extend(datagroup['filenames']) history = "Statistical comparison performed using CIS version " + __version__ + \ "\n variables: " + str(variables) + \ "\n from files: " + str(set(filenames)) cubes.add_history(history) cubes.save_data(main_arguments.output)
def test_complete_collapse_one_dim_using_moments_kernel(self): self.kernel = aggregation_kernels['moments'] data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data1.var_name = 'var1' data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data()) data2.var_name = 'var2' data2.data += 10 data = GriddedDataList([data1, data2]) output = data.collapsed(['x'], how=self.kernel) expect_mean = numpy.array([[5.5, 8.75, 9]]) expect_stddev = numpy.array( [numpy.sqrt(15), numpy.sqrt(26.25), numpy.sqrt(30)]) expect_count = numpy.array([[4, 4, 4]]) assert isinstance(output, GriddedDataList) assert len(output) == 6 mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output assert mean_1.var_name == 'var1' assert stddev_1.var_name == 'var1_std_dev' assert count_1.var_name == 'var1_num_points' assert mean_2.var_name == 'var2' assert stddev_2.var_name == 'var2_std_dev' assert count_2.var_name == 'var2_num_points' assert_arrays_almost_equal(mean_1.data, expect_mean) assert_arrays_almost_equal(mean_2.data, expect_mean + 10) assert_arrays_almost_equal(stddev_1.data, expect_stddev) assert_arrays_almost_equal(stddev_2.data, expect_stddev) assert_arrays_almost_equal(count_1.data, expect_count) assert_arrays_almost_equal(count_2.data, expect_count)
def test_empty_longitude_subset_of_gridded_data_list_returns_no_data(self): """ Checks that the convention of returning None if subsetting results in an empty subset. Longitude has a modulus and so uses the IRIS intersection method """ data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube()]) subset = data.subset(longitude=[1.0, 3.0]) assert (subset is None)
def test_empty_time_subset_of_gridded_data_list_returns_no_data(self): """ Checks that the convention of returning None if subsetting results in an empty subset. Longitude has no modulus and so uses the IRIS extract method """ data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube_with_time()]) subset = data.subset(time=[140500, 140550]) assert (subset is None)
def test_GIVEN_GriddedDataList_WHEN_constrain_THEN_correctly_subsetted_GriddedDataList_returned(self): gridded1 = cis.test.util.mock.make_square_5x3_2d_cube() gridded2 = cis.test.util.mock.make_square_5x3_2d_cube() datalist = GriddedDataList([gridded1, gridded2]) subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 5.0]) assert isinstance(subset, GriddedDataList) assert (subset[0].data.tolist() == [[5, 6], [8, 9], [11, 12]]) assert (subset[1].data.tolist() == [[5, 6], [8, 9], [11, 12]])
def test_GIVEN_GriddedDataList_WHEN_constrain_THEN_correctly_subsetted_GriddedDataList_returned( self): gridded1 = cis.test.util.mock.make_square_5x3_2d_cube() gridded2 = cis.test.util.mock.make_square_5x3_2d_cube() datalist = GriddedDataList([gridded1, gridded2]) subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 5.0]) assert isinstance(subset, GriddedDataList) assert (subset[0].data.tolist() == [[5, 6], [8, 9], [11, 12]]) assert (subset[1].data.tolist() == [[5, 6], [8, 9], [11, 12]])
def test_empty_time_subset_of_gridded_data_list_returns_no_data(self): """ Checks that the convention of returning None if subsetting results in an empty subset. Longitude has no modulus and so uses the IRIS extract method """ data = GriddedDataList( [cis.test.util.mock.make_square_5x3_2d_cube_with_time()]) subset = data.subset(time=[140500, 140550]) assert (subset is None)
def test_taylor_diagram_gridded(self): from cis.test.util.mock import make_mock_cube from cis.data_io.gridded_data import GriddedDataList d = GriddedDataList([make_mock_cube(), make_mock_cube(data_offset=2)]) d[0].var_name = 'snow' d[1].var_name = 'rain' d.plot(how='taylor') self.check_graphic()
def test_iris_comparative_scatter(self): from cis.test.util.mock import make_mock_cube from cis.data_io.gridded_data import GriddedDataList d = GriddedDataList([make_mock_cube(), make_mock_cube(data_offset=2)]) d[0].var_name = 'snow' d[1].var_name = 'rain' d.plot(how='comparativescatter') self.check_graphic()
def test_empty_time_subset_of_gridded_data_list_returns_no_data(self): """ Checks that the convention of returning None if subsetting results in an empty subset. Longitude has no modulus and so uses the IRIS extract method """ data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube_with_time()]) long_coord = data.coord('time') constraint = subset_constraint.GriddedSubsetConstraint() constraint.set_limit(long_coord, 140500, 140550) subset = constraint.constrain(data) assert (subset is None)
def test_iris_multiple_scatter(self): from cis.test.util.mock import make_mock_cube from cis.data_io.gridded_data import GriddedDataList # This only works with one dimensional gridded data d = GriddedDataList([make_mock_cube(lat_dim_length=0), make_mock_cube(lat_dim_length=0, data_offset=2)]) d[0].var_name = 'snow' d[1].var_name = 'rain' # Will default to line plots d.plot() self.check_graphic()
def test_aggregate_mean(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube()) data2 = make_from_cube(make_mock_cube(data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['y'], how=self.kernel) result1 = numpy.array([7, 8, 9]) result2 = result1 + 1 assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data)
def test_collapse_vertical_coordinate(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(alt_dim_length=6)) data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['z'], how=self.kernel) result1 = data1.data.mean(axis=2) result2 = result1 + 1 assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data) assert numpy.array_equal(data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
def _make_two_gridded(self): data1 = make_from_cube(mock.make_mock_cube()) data2 = make_from_cube(mock.make_mock_cube(data_offset=10)) data1.var_name = 'var1' data2._var_name = 'var2' data1.filenames = ['filename1'] data2.filenames = ['filename2'] self.data = [data1, data2] self.data = GriddedDataList([data1, data2])
def test_collapse_vertical_coordinate(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(alt_dim_length=6)) data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['z'], how=self.kernel) result1 = data1.data.mean(axis=2) result2 = result1 + 1 assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data) assert numpy.array_equal( data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
def test_collapse_vertical_coordinate_weighted_aggregator(self): """ We use a weighted aggregator, though no weights should be applied since we're only summing over the vertical """ from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(alt_dim_length=6)) data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['z'], how=iris.analysis.SUM) result1 = np.sum(data1.data, axis=2) result2 = np.sum(data2.data, axis=2) assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data) assert numpy.array_equal(data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord(self): from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5)) data2 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['t', 'x'], how=self.kernel) result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0], [156.0, 157.0, 158.0, 159.0, 160.0], [261.0, 262.0, 263.0, 264.0, 265.0], [366.0, 367.0, 368.0, 369.0, 370.0], [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float) multidim_coord_points = numpy.array([1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float) assert_arrays_almost_equal(cube_out[0].data, result_data) assert_arrays_almost_equal(cube_out[1].data, result_data+1) assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points) assert_arrays_almost_equal(cube_out[1].coord('surface_air_pressure').points, multidim_coord_points)
def test_collapse_vertical_coordinate_weighted_aggregator(self): """ We use a weighted aggregator, though no weights should be applied since we're only summing over the vertical """ from cis.data_io.gridded_data import GriddedDataList, make_from_cube data1 = make_from_cube(make_mock_cube(alt_dim_length=6)) data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1)) datalist = GriddedDataList([data1, data2]) cube_out = datalist.collapsed(['z'], how=iris.analysis.SUM) result1 = np.sum(data1.data, axis=2) result2 = np.sum(data2.data, axis=2) assert isinstance(cube_out, GriddedDataList) # There is a small deviation to the weighting correction applied by Iris when completely collapsing assert_arrays_almost_equal(result1, cube_out[0].data) assert_arrays_almost_equal(result2, cube_out[1].data) assert numpy.array_equal( data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
def __call__(self, kernel): from cis.data_io.gridded_data import GriddedDataList from cis.aggregation.collapse_kernels import MultiKernel # Make sure all coordinate have bounds - important for weighting and aggregating # Only try and guess bounds on Dim Coords for coord in self.data.coords(dim_coords=True): if not coord.has_bounds() and len(coord.points) > 1: coord.guess_bounds() logging.warning("Creating guessed bounds as none exist in file") new_coord_number = self.data.coord_dims(coord) self.data.remove_coord(coord.name()) self.data.add_dim_coord(coord, new_coord_number) output = GriddedDataList([]) if isinstance(kernel, MultiKernel): for sub_kernel in kernel.sub_kernels: sub_kernel_out = self._gridded_full_collapse(sub_kernel) output.append_or_extend(sub_kernel_out) else: output.append_or_extend(self._gridded_full_collapse(kernel)) return output
def __call__(self, kernel): from cis.data_io.gridded_data import GriddedDataList from cis.aggregation.collapse_kernels import MultiKernel # Make sure all coordinate have bounds - important for weighting and aggregating # Only try and guess bounds on Dim Coords for coord in self.data.coords(dim_coords=True): if not coord.has_bounds() and len(coord.points) > 1: coord.guess_bounds() logging.warning( "Creating guessed bounds as none exist in file") new_coord_number = self.data.coord_dims(coord) self.data.remove_coord(coord.name()) self.data.add_dim_coord(coord, new_coord_number) output = GriddedDataList([]) if isinstance(kernel, MultiKernel): for sub_kernel in kernel.sub_kernels: sub_kernel_out = self._gridded_full_collapse(sub_kernel) output.append_or_extend(sub_kernel_out) else: output.append_or_extend(self._gridded_full_collapse(kernel)) return output
def collocate(self, points, data, constraint, kernel): """ :param points: cube defining the sample points :param data: CommonData object providing data to be collocated (or list of Data) :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value :return: GriddedDataList of collocated data """ if isinstance(data, list): # If data is a list then call this method recursively over each element output_list = [] for variable in data: collocated = self.collocate(points, variable, constraint, kernel) output_list.extend(collocated) return GriddedDataList(output_list) data_points = data.get_non_masked_points() # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates. coord_map = make_coord_map(points, data) if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()): raise cis.exceptions.UserPrintableException( "A sample variable has been specified but not all coordinates in the data appear in the sample so " "there are multiple points in the sample data so whether the data is missing or not can not be " "determined") coords = points.coords() shape = [] output_coords = [] # Find shape of coordinates to be iterated over. for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): logging.warning("Creating guessed bounds as none exist in file") coord.guess_bounds() shape.append(coord.shape[0]) output_coords.append(coord) _fix_longitude_range(coords, data_points) # Create index if constraint supports it. data_index.create_indexes(constraint, coords, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) # Initialise output array as initially all masked, and set the appropriate fill value. values = [] for i in range(kernel.return_size): val = np.ma.zeros(shape) val.mask = True val.fill_value = self.fill_value values.append(val) if kernel.return_size == 1: set_value_kernel = self._set_single_value_kernel else: set_value_kernel = self._set_multi_value_kernel logging.info("--> Co-locating...") if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"): # Iterate over constrained cells iterator = constraint.get_iterator_for_data_only( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, data_values in iterator: try: kernel_val = kernel.get_value_for_data_only(data_values) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass else: # Iterate over constrained cells iterator = constraint.get_iterator( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, hp, con_points in iterator: try: kernel_val = kernel.get_value(hp, con_points) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass # Construct an output cube containing the collocated data. kernel_var_details = kernel.get_variable_details(data.var_name, data.long_name, data.standard_name, data.units) output = GriddedDataList([]) for idx, val in enumerate(values): cube = self._create_collocated_cube(data, val, output_coords) data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data) data_with_nan_and_inf_removed.set_fill_value(self.fill_value) cube.data = data_with_nan_and_inf_removed cube.var_name = kernel_var_details[idx][0] cube.long_name = kernel_var_details[idx][1] cis.utils.set_cube_standard_name_if_valid(cube, kernel_var_details[idx][2]) try: cube.units = kernel_var_details[idx][3] except ValueError: logging.warn( "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3])) # Sort the cube into the correct shape, so that the order of coordinates # is the same as in the source data coord_map = sorted(coord_map, key=lambda x: x[1]) transpose_order = [coord[2] for coord in coord_map] cube.transpose(transpose_order) output.append(cube) return output
def collocate(self, points, data, constraint, kernel): """ :param points: cube defining the sample points :param data: CommonData object providing data to be collocated (or list of Data) :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value :return: GriddedDataList of collocated data """ log_memory_profile("GeneralGriddedCollocator Initial") if isinstance(data, list): # If data is a list then call this method recursively over each element output_list = [] for variable in data: collocated = self.collocate(points, variable, constraint, kernel) output_list.extend(collocated) return GriddedDataList(output_list) data_points = data.get_non_masked_points() log_memory_profile("GeneralGriddedCollocator Created data hyperpoint list view") # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates. coord_map = make_coord_map(points, data) if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()): raise cis.exceptions.UserPrintableException( "A sample variable has been specified but not all coordinates in the data appear in the sample so " "there are multiple points in the sample data so whether the data is missing or not can not be " "determined") coords = points.coords() shape = [] output_coords = [] # Find shape of coordinates to be iterated over. for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): logging.warning("Creating guessed bounds as none exist in file") coord.guess_bounds() shape.append(coord.shape[0]) output_coords.append(coord) _fix_longitude_range(coords, data_points) log_memory_profile("GeneralGriddedCollocator Created output coord map") # Create index if constraint supports it. data_index.create_indexes(constraint, coords, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) log_memory_profile("GeneralGriddedCollocator Created indexes") # Initialise output array as initially all masked, and set the appropriate fill value. values = [] for i in range(kernel.return_size): val = np.ma.zeros(shape) val.mask = True val.fill_value = self.fill_value values.append(val) if kernel.return_size == 1: set_value_kernel = self._set_single_value_kernel else: set_value_kernel = self._set_multi_value_kernel logging.info("--> Co-locating...") if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"): # Iterate over constrained cells iterator = constraint.get_iterator_for_data_only( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, data_values in iterator: try: kernel_val = kernel.get_value_for_data_only(data_values) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass else: # Iterate over constrained cells iterator = constraint.get_iterator( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, hp, con_points in iterator: try: kernel_val = kernel.get_value(hp, con_points) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass log_memory_profile("GeneralGriddedCollocator Completed collocation") # Construct an output cube containing the collocated data. kernel_var_details = kernel.get_variable_details(self.var_name or data.var_name, self.var_long_name or data.long_name, data.standard_name, self.var_units or data.units) output = GriddedDataList([]) for idx, val in enumerate(values): cube = self._create_collocated_cube(data, val, output_coords) data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data) data_with_nan_and_inf_removed.set_fill_value(self.fill_value) cube.data = data_with_nan_and_inf_removed cube.var_name = kernel_var_details[idx][0] cube.long_name = kernel_var_details[idx][1] set_standard_name_if_valid(cube, kernel_var_details[idx][2]) try: cube.units = kernel_var_details[idx][3] except ValueError: logging.warn( "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3])) # Sort the cube into the correct shape, so that the order of coordinates # is the same as in the source data coord_map = sorted(coord_map, key=lambda x: x[1]) transpose_order = [coord[2] for coord in coord_map] cube.transpose(transpose_order) output.append(cube) log_memory_profile("GeneralGriddedCollocator Finished") return output
def setUp(self): self.calc = Calculator() self.data = GriddedDataList([make_from_cube(mock.make_mock_cube())]) self.data[0].var_name = 'var_name'
def test_gridded_list_write_time_as_unlimited_dimension(self): data = GriddedDataList([make_from_cube(make_mock_cube(time_dim_length=7))]) data[0].var_name = 'rain' data.save_data(tmp_file) self.d = Dataset(tmp_file) assert self.d.dimensions['time'].isunlimited()
def collocate(self, points, data, constraint, kernel): """ This collocator takes two Iris cubes, and collocates from the data cube onto the grid of the 'points' cube. The collocator then returns another Iris cube. :param points: An Iris cube with the sampling grid to collocate onto. :param data: The Iris cube with the data to be collocated. :param constraint: None allowed yet, as this is unlikely to be required for gridded-gridded. :param kernel: The kernel to use, current options are gridded_gridded_nn and gridded_gridded_li. :return: An Iris cube with the collocated data. """ self._check_for_valid_kernel(kernel) # Force the data longitude range to be the same as that of the sample grid. _fix_longitude_range(points.coords(), data) # Initialise variables used to create an output mask based on the sample data mask. sample_coord_lookup = {} # Maps coordinate in sample data -> location in dimension order for idx, coord in enumerate(points.coords()): sample_coord_lookup[coord] = idx sample_coord_transpose_map = [] # For coords in both sample and data, contains the position in the sample other_coord_transpose_map = [] # For coords in data but not in sample, records that coord's position in data. repeat_size = 1 output_mask = np.ma.nomask # Make a list of the coordinates we have, with each entry containing a list with the name of the coordinate and # the number of points along its axis. One is for the sample grid, which contains the points where we # interpolate too, and one is for the output grid, which will additionally contain any dimensions missing in the # sample grid. coord_names_and_sizes_for_sample_grid = [] coord_names_and_sizes_for_output_grid = [] for idx, coord in enumerate(data.coords(dim_coords=True)): # First try and find the coordinate in points, the sample grid. If an exception is thrown, it means that # name does not appear in the sample grid, and instead take the coordinate name and length from the original # data, as this is what we will be keeping. try: sample_coord = points.coords(coord.name())[0] coord_names_and_sizes_for_sample_grid.append([coord.name(), len(sample_coord.points)]) # Find the index of the sample coordinate corresponding to the data coordinate. sample_coord_transpose_map.append(sample_coord_lookup[sample_coord]) except IndexError: coord_names_and_sizes_for_output_grid.append([coord.name(), len(coord.points)]) repeat_size *= len(coord.points) other_coord_transpose_map.append(idx) # Now we sort the sample coordinates so that they are in the same order as in the sample file, # rather than the order of the data file (that's the order we want the output dimensions). coord_names_and_sizes_for_sample_grid = [x[0] for x in sorted(zip(coord_names_and_sizes_for_sample_grid, sample_coord_transpose_map), key=lambda t: t[1])] # Adding the lists together in this way ensures that the coordinates not in the sample grid appear in the final # position, which is important for adding the points from the Iris interpolater to the new array. The data # returned from the Iris interpolater method will have dimensions of these missing coordinates, which needs # to be the final dimensions in the numpy array, as the iterator will give the position of the other dimensions. coord_names_and_sizes_for_output_grid = coord_names_and_sizes_for_sample_grid + \ coord_names_and_sizes_for_output_grid # An array for the collocated data, with the correct shape output_shape = tuple(i[1] for i in coord_names_and_sizes_for_output_grid) new_data = np.zeros(output_shape) if self.missing_data_for_missing_sample: output_mask = self._make_output_mask(coord_names_and_sizes_for_sample_grid, output_shape, points, repeat_size) # Now recreate the points cube, while ignoring any DimCoords in points that are not in the data cube new_dim_coord_list = [] new_points_array_shape = [] for i in range(0, len(coord_names_and_sizes_for_output_grid)): # Try and find the coordinate in the sample grid coord_found = points.coords(coord_names_and_sizes_for_output_grid[i][0]) # If the coordinate exists in the sample grid then append the new coordinate to the list. Iris requires # this be given as a DimCoord object, along with a axis number, in a tuple pair. if len(coord_found) != 0: new_dim_coord_list.append((coord_found[0], len(new_dim_coord_list))) new_points_array_shape.append(coord_found[0].points.size) new_points_array = np.zeros(tuple(new_points_array_shape)) # Use the new_data array to recreate points, without the DimCoords not in the data cube points = iris.cube.Cube(new_points_array, dim_coords_and_dims=new_dim_coord_list) output_cube = self._iris_interpolate(coord_names_and_sizes_for_output_grid, coord_names_and_sizes_for_sample_grid, data, kernel, output_mask, points, self.extrapolate) if not isinstance(output_cube, list): return GriddedDataList([output_cube]) else: return output_cube