def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param UngriddedData or UngriddedCoordinates points: Objects defining the sample points :param GriddedData or GriddedDataList data: Data to resample :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return: A single LazyData object """ from cis.collocation.gridded_interpolation import GriddedUngriddedInterpolator log_memory_profile("GriddedUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output if constraint is not None and not isinstance(constraint, DummyConstraint): raise ValueError("A constraint cannot be specified for the GriddedUngriddedCollocator") data_points = data # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data_points) log_memory_profile("GriddedUngriddedCollocator after data retrieval") logging.info("--> Collocating...") logging.info(" {} sample points".format(points.size)) if self.interpolator is None: # Cache the interpolator self.interpolator = GriddedUngriddedInterpolator(data, points, kernel, self.missing_data_for_missing_sample) values = self.interpolator(data, fill_value=self.fill_value, extrapolate=self.extrapolate) log_memory_profile("GriddedUngriddedCollocator after running kernel on sample points") metadata = Metadata(self.var_name or data.var_name, long_name=self.var_long_name or data.long_name, shape=values.shape, missing_value=self.fill_value, units=self.var_units or data.units) set_standard_name_if_valid(metadata, data.standard_name) return_data = UngriddedDataList([UngriddedData(values, metadata, points.coords())]) log_memory_profile("GriddedUngriddedCollocator final") return return_data
def test_combining(self): from cis.test.util.mock import make_regular_2d_ungridded_data another_list = UngriddedDataList([make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data()]) # Test adding assert_that(isinstance(self.ungridded_data_list + another_list, UngriddedDataList)) # Test extending another_list.extend(self.ungridded_data_list) assert_that(isinstance(another_list, UngriddedDataList)) assert_that(len(another_list) == 4) # Test can't add single items with assert_raises(TypeError): self.ungridded_data_list + another_list[0]
def test_combining(self): from cis.test.util.mock import make_regular_2d_ungridded_data another_list = UngriddedDataList([ make_regular_2d_ungridded_data(), make_regular_2d_ungridded_data() ]) # Test adding assert_that( isinstance(self.ungridded_data_list + another_list, UngriddedDataList)) # Test extending another_list.extend(self.ungridded_data_list) assert_that(isinstance(another_list, UngriddedDataList)) assert_that(len(another_list) == 4) # Test can't add single items with assert_raises(TypeError): self.ungridded_data_list + another_list[0]
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param UngriddedData or UngriddedCoordinates points: Object defining the sample points :param UngriddedData data: The source data to collocate from :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return UngriddedData or UngriddedDataList: Depending on the input """ log_memory_profile("GeneralUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data) # Convert to dataframes for fancy indexing sample_points = points.as_data_frame(time_index=False, name='vals') data_points = data.as_data_frame(time_index=False, name='vals').dropna(axis=0) log_memory_profile("GeneralUngriddedCollocator after data retrieval") # Create index if constraint and/or kernel require one. coord_map = None data_index.create_indexes(constraint, points, data_points, coord_map) log_memory_profile("GeneralUngriddedCollocator after indexing") logging.info("--> Collocating...") # Create output arrays. self.var_name = data.var_name self.var_long_name = data.long_name self.var_standard_name = data.standard_name self.var_units = data.units var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name, self.var_standard_name, self.var_units) sample_points_count = len(sample_points) # Create an empty masked array to store the collocated values. The elements will be unmasked by assignment. values = np.ma.masked_all((len(var_set_details), sample_points_count)) values.fill_value = self.fill_value log_memory_profile("GeneralUngriddedCollocator after output array creation") logging.info(" {} sample points".format(sample_points_count)) # Apply constraint and/or kernel to each sample point. if isinstance(kernel, nn_horizontal_only): # Only find the nearest point using the kd-tree, without constraint in other dimensions nearest_points = data_points.iloc[constraint.haversine_distance_kd_tree_index.find_nearest_point(sample_points)] values[0, :] = nearest_points.vals.values else: for i, point, con_points in constraint.get_iterator(self.missing_data_for_missing_sample, None, None, data_points, None, sample_points, None): try: values[:, i] = kernel.get_value(point, con_points) # Kernel returns either a single value or a tuple of values to insert into each output variable. except CoordinateMultiDimError as e: raise NotImplementedError(e) except ValueError as e: pass log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points") # Mask any bad values values = np.ma.masked_invalid(values) return_data = UngriddedDataList() for idx, var_details in enumerate(var_set_details): var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),), missing_value=self.fill_value, units=var_details[3]) set_standard_name_if_valid(var_metadata, var_details[2]) return_data.append(UngriddedData(values[idx, :], var_metadata, points.coords())) log_memory_profile("GeneralUngriddedCollocator final") return return_data
def collocate(self, points, data, constraint, kernel): """ This collocator takes a list of HyperPoints and a data object (currently either Ungridded data or a Cube) and returns one new LazyData object with the values as determined by the constraint and kernel objects. The metadata for the output LazyData object is copied from the input data object. :param points: UngriddedData or UngriddedCoordinates defining the sample points :param data: An UngriddedData object or Cube, or any other object containing metadata that the constraint object can read. May also be a list of objects, in which case a list will be returned :param constraint: An instance of a Constraint subclass which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: An instance of a Kernel subclass which takes a number of points and returns a single value :return: A single LazyData object """ log_memory_profile("GeneralUngriddedCollocator Initial") if isinstance(data, list): # Indexing and constraints (for SepConstraintKdTree) will only take place on the first iteration, # so we really can just call this method recursively if we've got a list of data. output = UngriddedDataList() for var in data: output.extend(self.collocate(points, var, constraint, kernel)) return output metadata = data.metadata sample_points = points.get_all_points() # Convert ungridded data to a list of points if kernel needs it. # Special case checks for kernels that use a cube - this could be done more elegantly. if isinstance(kernel, nn_gridded) or isinstance(kernel, li): if hasattr(kernel, "interpolator"): # If we have an interpolator on the kernel we need to reset it as it depends on the actual values # as well as the coordinates kernel.interpolator = None kernel.coord_names = [] if not isinstance(data, iris.cube.Cube): raise ValueError("Ungridded data cannot be used with kernel nn_gridded or li") if constraint is not None and not isinstance(constraint, DummyConstraint): raise ValueError("A constraint cannot be specified with kernel nn_gridded or li") data_points = data else: data_points = data.get_non_masked_points() # First fix the sample points so that they all fall within the same 360 degree longitude range _fix_longitude_range(points.coords(), sample_points) # Then fix the data points so that they fall onto the same 360 degree longitude range as the sample points _fix_longitude_range(points.coords(), data_points) log_memory_profile("GeneralUngriddedCollocator after data retrieval") # Create index if constraint and/or kernel require one. coord_map = None data_index.create_indexes(constraint, points, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) log_memory_profile("GeneralUngriddedCollocator after indexing") logging.info("--> Collocating...") # Create output arrays. self.var_name = data.name() self.var_long_name = metadata.long_name self.var_standard_name = metadata.standard_name self.var_units = data.units var_set_details = kernel.get_variable_details(self.var_name, self.var_long_name, self.var_standard_name, self.var_units) sample_points_count = len(sample_points) values = np.zeros((len(var_set_details), sample_points_count)) + self.fill_value log_memory_profile("GeneralUngriddedCollocator after output array creation") logging.info(" {} sample points".format(sample_points_count)) # Apply constraint and/or kernel to each sample point. cell_count = 0 total_count = 0 for i, point in sample_points.enumerate_non_masked_points(): # Log progress periodically. cell_count += 1 if cell_count == 1000: total_count += cell_count cell_count = 0 logging.info(" Processed {} points of {}".format(total_count, sample_points_count)) if constraint is None: con_points = data_points else: con_points = constraint.constrain_points(point, data_points) try: value_obj = kernel.get_value(point, con_points) # Kernel returns either a single value or a tuple of values to insert into each output variable. if isinstance(value_obj, tuple): for idx, val in enumerate(value_obj): if not np.isnan(val): values[idx, i] = val else: values[0, i] = value_obj except CoordinateMultiDimError as e: raise NotImplementedError(e) except ValueError as e: pass log_memory_profile("GeneralUngriddedCollocator after running kernel on sample points") return_data = UngriddedDataList() for idx, var_details in enumerate(var_set_details): if idx == 0: new_data = UngriddedData(values[0, :], metadata, points.coords()) new_data.metadata._name = var_details[0] new_data.metadata.long_name = var_details[1] cis.utils.set_cube_standard_name_if_valid(new_data, var_details[2]) new_data.metadata.shape = (len(sample_points),) new_data.metadata.missing_value = self.fill_value new_data.units = var_details[2] else: var_metadata = Metadata(name=var_details[0], long_name=var_details[1], shape=(len(sample_points),), missing_value=self.fill_value, units=var_details[2]) new_data = UngriddedData(values[idx, :], var_metadata, points.coords()) return_data.append(new_data) log_memory_profile("GeneralUngriddedCollocator final") return return_data