def test_GIVEN_single_masked_point_in_cube_WHEN_iterate_THEN_return_no_points( self): sample_cube = make_square_5x3_2d_cube_with_time(offset=0, time_offset=0) data_point = make_dummy_ungridded_data_single_point( 0.5, 0.5, 1.2, time=datetime.datetime(1984, 8, 28, 0, 0), mask=True) coord_map = make_coord_map(sample_cube, data_point) coords = sample_cube.coords() for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError( "Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): coord.guess_bounds() constraint = BinnedCubeCellOnlyConstraint() data_index.create_indexes(constraint, coords, data_point.get_non_masked_points(), coord_map) iterator = constraint.get_iterator(False, coord_map, coords, data_point.get_non_masked_points(), None, sample_cube, None) final_points_index = [(out_index, hp, points) for out_index, hp, points in iterator] assert_that(len(final_points_index), is_(0), "Masked points should not be iterated over")
def test_GIVEN_single_point_in_cube_WHEN_iterate_THEN_return_point_in_middle(self): sample_cube = make_square_5x3_2d_cube_with_time(offset=0, time_offset=0) data_point = make_dummy_ungridded_data_single_point(0.5, 0.5, 1.2, time=datetime.datetime(1984, 8, 28, 0, 0)) coord_map = make_coord_map(sample_cube, data_point) coords = sample_cube.coords() for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): coord.guess_bounds() constraint = BinnedCubeCellOnlyConstraint() data_index.create_indexes(constraint, coords, data_point.get_non_masked_points(), coord_map) iterator = constraint.get_iterator(False, coord_map, coords, data_point.get_non_masked_points(), None, sample_cube, None) final_points_index = [(out_index, hp, points) for out_index, hp, points in iterator] assert_that(len(final_points_index), is_(1), "There is one mapping from sample_cube to the final grid") assert_that(final_points_index[0][0], is_((2, 1, 1)), "The points should map to index") assert_that(final_points_index[0][1], is_(HyperPoint(lat=0, lon=0, t=datetime.datetime(1984, 8, 28))), "The points should map to index") assert_that(final_points_index[0][2].latitudes, is_([0.5]), "The points should map to index") assert_that(final_points_index[0][2].longitudes, is_([0.5]), "The points should map to index") assert_that(final_points_index[0][2].times, is_([convert_datetime_to_std_time(datetime.datetime(1984, 8, 28, 0, 0))]), "The points should map to index") assert_that(final_points_index[0][2].vals, is_([1.2]), "The points should map to index")
def test_GIVEN_single_point_in_cube_WHEN_iterate_THEN_return_point_in_middle( self): sample_cube = make_square_5x3_2d_cube_with_time(offset=0, time_offset=0) data_point = make_dummy_ungridded_data_single_point( 0.5, 0.5, 1.2, time=datetime.datetime(1984, 8, 28, 0, 0)) coord_map = make_coord_map(sample_cube, data_point) coords = sample_cube.coords() for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError( "Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): coord.guess_bounds() constraint = BinnedCubeCellOnlyConstraint() data_index.create_indexes(constraint, coords, data_point.get_non_masked_points(), coord_map) iterator = constraint.get_iterator(False, coord_map, coords, data_point.get_non_masked_points(), None, sample_cube, None) final_points_index = [(out_index, hp, points) for out_index, hp, points in iterator] assert_that(len(final_points_index), is_(1), "There is one mapping from sample_cube to the final grid") assert_that(final_points_index[0][0], is_((2, 1, 1)), "The points should map to index") assert_that( final_points_index[0][1], is_(HyperPoint(lat=0, lon=0, t=datetime.datetime(1984, 8, 28))), "The points should map to index") assert_that(final_points_index[0][2].latitudes, is_([0.5]), "The points should map to index") assert_that(final_points_index[0][2].longitudes, is_([0.5]), "The points should map to index") assert_that( final_points_index[0][2].times, is_([ convert_datetime_to_std_time( datetime.datetime(1984, 8, 28, 0, 0)) ]), "The points should map to index") assert_that(final_points_index[0][2].vals, is_([1.2]), "The points should map to index")
def test_GIVEN_single_masked_point_in_cube_WHEN_iterate_THEN_return_no_points(self): sample_cube = make_square_5x3_2d_cube_with_time(offset=0, time_offset=0) data_point = make_dummy_ungridded_data_single_point(0.5, 0.5, 1.2, time=datetime.datetime(1984, 8, 28, 0, 0), mask=True) coord_map = make_coord_map(sample_cube, data_point) coords = sample_cube.coords() for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): coord.guess_bounds() constraint = BinnedCubeCellOnlyConstraint() data_index.create_indexes(constraint, coords, data_point.get_non_masked_points(), coord_map) iterator = constraint.get_iterator(False, coord_map, coords, data_point.get_non_masked_points(), None, sample_cube, None) final_points_index = [(out_index, hp, points) for out_index, hp, points in iterator] assert_that(len(final_points_index), is_(0), "Masked points should not be iterated over")
def collocate(self, points, data, constraint, kernel): """ :param points: cube defining the sample points :param data: CommonData object providing data to be collocated (or list of Data) :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value :return: GriddedDataList of collocated data """ log_memory_profile("GeneralGriddedCollocator Initial") if isinstance(data, list): # If data is a list then call this method recursively over each element output_list = [] for variable in data: collocated = self.collocate(points, variable, constraint, kernel) output_list.extend(collocated) return GriddedDataList(output_list) data_points = data.get_non_masked_points() log_memory_profile("GeneralGriddedCollocator Created data hyperpoint list view") # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates. coord_map = make_coord_map(points, data) if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()): raise cis.exceptions.UserPrintableException( "A sample variable has been specified but not all coordinates in the data appear in the sample so " "there are multiple points in the sample data so whether the data is missing or not can not be " "determined") coords = points.coords() shape = [] output_coords = [] # Find shape of coordinates to be iterated over. for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): logging.warning("Creating guessed bounds as none exist in file") coord.guess_bounds() shape.append(coord.shape[0]) output_coords.append(coord) _fix_longitude_range(coords, data_points) log_memory_profile("GeneralGriddedCollocator Created output coord map") # Create index if constraint supports it. data_index.create_indexes(constraint, coords, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) log_memory_profile("GeneralGriddedCollocator Created indexes") # Initialise output array as initially all masked, and set the appropriate fill value. values = [] for i in range(kernel.return_size): val = np.ma.zeros(shape) val.mask = True val.fill_value = self.fill_value values.append(val) if kernel.return_size == 1: set_value_kernel = self._set_single_value_kernel else: set_value_kernel = self._set_multi_value_kernel logging.info("--> Co-locating...") if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"): # Iterate over constrained cells iterator = constraint.get_iterator_for_data_only( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, data_values in iterator: try: kernel_val = kernel.get_value_for_data_only(data_values) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass else: # Iterate over constrained cells iterator = constraint.get_iterator( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, hp, con_points in iterator: try: kernel_val = kernel.get_value(hp, con_points) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass log_memory_profile("GeneralGriddedCollocator Completed collocation") # Construct an output cube containing the collocated data. kernel_var_details = kernel.get_variable_details(self.var_name or data.var_name, self.var_long_name or data.long_name, data.standard_name, self.var_units or data.units) output = GriddedDataList([]) for idx, val in enumerate(values): cube = self._create_collocated_cube(data, val, output_coords) data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data) data_with_nan_and_inf_removed.set_fill_value(self.fill_value) cube.data = data_with_nan_and_inf_removed cube.var_name = kernel_var_details[idx][0] cube.long_name = kernel_var_details[idx][1] set_standard_name_if_valid(cube, kernel_var_details[idx][2]) try: cube.units = kernel_var_details[idx][3] except ValueError: logging.warn( "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3])) # Sort the cube into the correct shape, so that the order of coordinates # is the same as in the source data coord_map = sorted(coord_map, key=lambda x: x[1]) transpose_order = [coord[2] for coord in coord_map] cube.transpose(transpose_order) output.append(cube) log_memory_profile("GeneralGriddedCollocator Finished") return output
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param UngriddedData or UngriddedCoordinates points: Object defining the sample points :param UngriddedData data: The source data to collocate from :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return UngriddedData or UngriddedDataList: Depending on the input """ log_memory_profile("GeneralUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data) # Convert to dataframes for fancy indexing sample_points = points.as_data_frame(time_index=False, name='vals') data_points = data.as_data_frame(time_index=False, name='vals').dropna(axis=0) log_memory_profile("GeneralUngriddedCollocator after data retrieval") # Create index if constraint and/or kernel require one. coord_map = None data_index.create_indexes(constraint, points, data_points, coord_map) log_memory_profile("GeneralUngriddedCollocator after indexing") logging.info("--> Collocating...") # Create output arrays. self.var_name = data.var_name self.var_long_name = data.long_name self.var_standard_name = data.standard_name self.var_units = data.units var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name, self.var_standard_name, self.var_units) sample_points_count = len(sample_points) # Create an empty masked array to store the collocated values. The elements will be unmasked by assignment. values = np.ma.masked_all((len(var_set_details), sample_points_count)) values.fill_value = self.fill_value log_memory_profile("GeneralUngriddedCollocator after output array creation") logging.info(" {} sample points".format(sample_points_count)) # Apply constraint and/or kernel to each sample point. if isinstance(kernel, nn_horizontal_only): # Only find the nearest point using the kd-tree, without constraint in other dimensions nearest_points = data_points.iloc[constraint.haversine_distance_kd_tree_index.find_nearest_point(sample_points)] values[0, :] = nearest_points.vals.values else: for i, point, con_points in constraint.get_iterator(self.missing_data_for_missing_sample, None, None, data_points, None, sample_points, None): try: values[:, i] = kernel.get_value(point, con_points) # Kernel returns either a single value or a tuple of values to insert into each output variable. except CoordinateMultiDimError as e: raise NotImplementedError(e) except ValueError as e: pass log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points") # Mask any bad values values = np.ma.masked_invalid(values) return_data = UngriddedDataList() for idx, var_details in enumerate(var_set_details): var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),), missing_value=self.fill_value, units=var_details[3]) set_standard_name_if_valid(var_metadata, var_details[2]) return_data.append(UngriddedData(values[idx, :], var_metadata, points.coords())) log_memory_profile("GeneralUngriddedCollocator final") return return_data
def collocate(self, points, data, constraint, kernel): """ :param points: cube defining the sample points :param data: CommonData object providing data to be collocated (or list of Data) :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value :return: GriddedDataList of collocated data """ if isinstance(data, list): # If data is a list then call this method recursively over each element output_list = [] for variable in data: collocated = self.collocate(points, variable, constraint, kernel) output_list.extend(collocated) return GriddedDataList(output_list) data_points = data.get_non_masked_points() # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates. coord_map = make_coord_map(points, data) if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()): raise cis.exceptions.UserPrintableException( "A sample variable has been specified but not all coordinates in the data appear in the sample so " "there are multiple points in the sample data so whether the data is missing or not can not be " "determined") coords = points.coords() shape = [] output_coords = [] # Find shape of coordinates to be iterated over. for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): logging.warning("Creating guessed bounds as none exist in file") coord.guess_bounds() shape.append(coord.shape[0]) output_coords.append(coord) _fix_longitude_range(coords, data_points) # Create index if constraint supports it. data_index.create_indexes(constraint, coords, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) # Initialise output array as initially all masked, and set the appropriate fill value. values = [] for i in range(kernel.return_size): val = np.ma.zeros(shape) val.mask = True val.fill_value = self.fill_value values.append(val) if kernel.return_size == 1: set_value_kernel = self._set_single_value_kernel else: set_value_kernel = self._set_multi_value_kernel logging.info("--> Co-locating...") if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"): # Iterate over constrained cells iterator = constraint.get_iterator_for_data_only( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, data_values in iterator: try: kernel_val = kernel.get_value_for_data_only(data_values) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass else: # Iterate over constrained cells iterator = constraint.get_iterator( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, hp, con_points in iterator: try: kernel_val = kernel.get_value(hp, con_points) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass # Construct an output cube containing the collocated data. kernel_var_details = kernel.get_variable_details(data.var_name, data.long_name, data.standard_name, data.units) output = GriddedDataList([]) for idx, val in enumerate(values): cube = self._create_collocated_cube(data, val, output_coords) data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data) data_with_nan_and_inf_removed.set_fill_value(self.fill_value) cube.data = data_with_nan_and_inf_removed cube.var_name = kernel_var_details[idx][0] cube.long_name = kernel_var_details[idx][1] cis.utils.set_cube_standard_name_if_valid(cube, kernel_var_details[idx][2]) try: cube.units = kernel_var_details[idx][3] except ValueError: logging.warn( "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3])) # Sort the cube into the correct shape, so that the order of coordinates # is the same as in the source data coord_map = sorted(coord_map, key=lambda x: x[1]) transpose_order = [coord[2] for coord in coord_map] cube.transpose(transpose_order) output.append(cube) return output
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param points: UngriddedData or UngriddedCoordinates defining the sample points :param data: An UngriddedData object or Cube, or any other object containing metadata that the constraint object can read. May also be a list of objects, in which case a list will be returned :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return: A single LazyData object """ log_memory_profile("GeneralUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output metadata = data.metadata sample_points = points.get_all_points() # Convert ungridded data to a list of points if kernel needs it. # Special case checks for kernels that use a cube - this could be done more elegantly. if isinstance(kernel, nn_gridded) or isinstance(kernel, li): if hasattr(kernel, "interpolator"): # If we have an interpolator on the kernel we need to reset it as it depends on the actual values # as well as the coordinates kernel.interpolator = None kernel.coord_names = [] if not isinstance(data, iris.cube.Cube): raise ValueError("Ungridded data cannot be used with kernel nn_gridded or li") if constraint is not None and not isinstance(constraint, DummyConstraint): raise ValueError("A constraint cannot be specified with kernel nn_gridded or li") data_points = data else: data_points = data.get_non_masked_points() # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), sample_points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data_points) log_memory_profile("GeneralUngriddedCollocator after data retrieval") # Create index if constraint and/or kernel require one. coord_map = None data_index.create_indexes(constraint, points, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) log_memory_profile("GeneralUngriddedCollocator after indexing") logging.info("--> Collocating...") # Create output arrays. self.var_name = data.name() self.var_long_name = metadata.long_name self.var_standard_name = metadata.standard_name self.var_units = data.units var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name, self.var_standard_name, self.var_units) sample_points_count = len(sample_points) values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value log_memory_profile("GeneralUngriddedCollocator after output array creation") logging.info(" {} sample points".format(sample_points_count)) # Apply constraint and/or kernel to each sample point. cell_count = 0 total_count = 0 for i, point in sample_points.enumerate_non_masked_points(): # Log progress periodically. cell_count += 1 if cell_count == 1000: total_count += cell_count cell_count = 0 logging.info(" Processed {} points of {}".format(total_count, sample_points_count)) if constraint is None: con_points = data_points else: con_points = constraint.constrain_points(point, data_points) try: value_obj = kernel.get_value(point, con_points) # Kernel returns either a single value or a tuple of values to insert into each output variable. if isinstance(value_obj, tuple): for idx, val in enumerate(value_obj): if not np.isnan(val): values[idx, i] = val else: values[0, i] = value_obj except CoordinateMultiDimError as e: raise NotImplementedError(e) except ValueError as e: pass log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points") return_data = UngriddedDataList() for idx, var_details in enumerate(var_set_details): if idx == 0: new_data = UngriddedData(values[0, :], metadata, points.coords()) new_data.metadata._name = var_details[0] new_data.metadata.long_name = var_details[1] cis.utils.set_cube_standard_name_if_valid(new_data, var_details[2]) new_data.metadata.shape = (len(sample_points),) new_data.metadata.missing_value = self.fill_value new_data.units = var_details[2] else: var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),), missing_value=self.fill_value, units=var_details[2]) new_data = UngriddedData(values[idx, :], var_metadata, points.coords()) return_data.append(new_data) log_memory_profile("GeneralUngriddedCollocator final") return return_data