def read_data_list(self, filenames, variables, product=None, aliases=None): """ Read multiple data objects. Files can be either gridded or ungridded but not a mix of both. :param filenames: One or more filenames of the files to read :type filenames: string or list :param variables: One or more variables to read from the files :type variables: string or list :param str product: Name of data product to use (optional) :param aliases: List of variable aliases to put on each variables data object as an alternative means of identifying them. (Optional) :return: A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the type of data contained in the files) """ # if filenames or variables are not lists, make them lists of 1 element filenames = listify(filenames) variables = listify(variables) aliases = listify(aliases) if aliases else None variables = self._expand_wildcards(variables, filenames, product) data_list = None for idx, variable in enumerate(variables): var_data = self._get_data_func(filenames, variable, product) var_data.filenames = filenames if aliases: try: var_data.alias = aliases[idx] except IndexError: raise ValueError("Number of aliases does not match number of variables") if data_list is None: data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList() data_list.append(var_data) assert data_list is not None return data_list
def collocate(self, points, data, constraint, kernel): """ :param points: cube defining the sample points :param data: CommonData object providing data to be collocated (or list of Data) :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value :return: GriddedDataList of collocated data """ log_memory_profile("GeneralGriddedCollocator Initial") if isinstance(data, list): # If data is a list then call this method recursively over each element output_list = [] for variable in data: collocated = self.collocate(points, variable, constraint, kernel) output_list.extend(collocated) return GriddedDataList(output_list) data_points = data.get_non_masked_points() log_memory_profile("GeneralGriddedCollocator Created data hyperpoint list view") # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates. coord_map = make_coord_map(points, data) if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()): raise cis.exceptions.UserPrintableException( "A sample variable has been specified but not all coordinates in the data appear in the sample so " "there are multiple points in the sample data so whether the data is missing or not can not be " "determined") coords = points.coords() shape = [] output_coords = [] # Find shape of coordinates to be iterated over. for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): logging.warning("Creating guessed bounds as none exist in file") coord.guess_bounds() shape.append(coord.shape[0]) output_coords.append(coord) _fix_longitude_range(coords, data_points) log_memory_profile("GeneralGriddedCollocator Created output coord map") # Create index if constraint supports it. data_index.create_indexes(constraint, coords, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) log_memory_profile("GeneralGriddedCollocator Created indexes") # Initialise output array as initially all masked, and set the appropriate fill value. values = [] for i in range(kernel.return_size): val = np.ma.zeros(shape) val.mask = True val.fill_value = self.fill_value values.append(val) if kernel.return_size == 1: set_value_kernel = self._set_single_value_kernel else: set_value_kernel = self._set_multi_value_kernel logging.info("--> Co-locating...") if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"): # Iterate over constrained cells iterator = constraint.get_iterator_for_data_only( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, data_values in iterator: try: kernel_val = kernel.get_value_for_data_only(data_values) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass else: # Iterate over constrained cells iterator = constraint.get_iterator( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, hp, con_points in iterator: try: kernel_val = kernel.get_value(hp, con_points) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass log_memory_profile("GeneralGriddedCollocator Completed collocation") # Construct an output cube containing the collocated data. kernel_var_details = kernel.get_variable_details(self.var_name or data.var_name, self.var_long_name or data.long_name, data.standard_name, self.var_units or data.units) output = GriddedDataList([]) for idx, val in enumerate(values): cube = self._create_collocated_cube(data, val, output_coords) data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data) data_with_nan_and_inf_removed.set_fill_value(self.fill_value) cube.data = data_with_nan_and_inf_removed cube.var_name = kernel_var_details[idx][0] cube.long_name = kernel_var_details[idx][1] set_standard_name_if_valid(cube, kernel_var_details[idx][2]) try: cube.units = kernel_var_details[idx][3] except ValueError: logging.warn( "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3])) # Sort the cube into the correct shape, so that the order of coordinates # is the same as in the source data coord_map = sorted(coord_map, key=lambda x: x[1]) transpose_order = [coord[2] for coord in coord_map] cube.transpose(transpose_order) output.append(cube) log_memory_profile("GeneralGriddedCollocator Finished") return output
def collocate(self, points, data, constraint, kernel): """ :param points: cube defining the sample points :param data: CommonData object providing data to be collocated (or list of Data) :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that data based on it's internal parameters :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value :return: GriddedDataList of collocated data """ if isinstance(data, list): # If data is a list then call this method recursively over each element output_list = [] for variable in data: collocated = self.collocate(points, variable, constraint, kernel) output_list.extend(collocated) return GriddedDataList(output_list) data_points = data.get_non_masked_points() # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates. coord_map = make_coord_map(points, data) if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()): raise cis.exceptions.UserPrintableException( "A sample variable has been specified but not all coordinates in the data appear in the sample so " "there are multiple points in the sample data so whether the data is missing or not can not be " "determined") coords = points.coords() shape = [] output_coords = [] # Find shape of coordinates to be iterated over. for (hpi, ci, shi) in coord_map: coord = coords[ci] if coord.ndim > 1: raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater" " than one is not supported (coordinate %s)", coord.name()) # Ensure that bounds exist. if not coord.has_bounds(): logging.warning("Creating guessed bounds as none exist in file") coord.guess_bounds() shape.append(coord.shape[0]) output_coords.append(coord) _fix_longitude_range(coords, data_points) # Create index if constraint supports it. data_index.create_indexes(constraint, coords, data_points, coord_map) data_index.create_indexes(kernel, points, data_points, coord_map) # Initialise output array as initially all masked, and set the appropriate fill value. values = [] for i in range(kernel.return_size): val = np.ma.zeros(shape) val.mask = True val.fill_value = self.fill_value values.append(val) if kernel.return_size == 1: set_value_kernel = self._set_single_value_kernel else: set_value_kernel = self._set_multi_value_kernel logging.info("--> Co-locating...") if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"): # Iterate over constrained cells iterator = constraint.get_iterator_for_data_only( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, data_values in iterator: try: kernel_val = kernel.get_value_for_data_only(data_values) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass else: # Iterate over constrained cells iterator = constraint.get_iterator( self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values) for out_indices, hp, con_points in iterator: try: kernel_val = kernel.get_value(hp, con_points) set_value_kernel(kernel_val, values, out_indices) except ValueError: # ValueErrors are raised by Kernel when there are no points to operate on. # We don't need to do anything. pass # Construct an output cube containing the collocated data. kernel_var_details = kernel.get_variable_details(data.var_name, data.long_name, data.standard_name, data.units) output = GriddedDataList([]) for idx, val in enumerate(values): cube = self._create_collocated_cube(data, val, output_coords) data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data) data_with_nan_and_inf_removed.set_fill_value(self.fill_value) cube.data = data_with_nan_and_inf_removed cube.var_name = kernel_var_details[idx][0] cube.long_name = kernel_var_details[idx][1] cis.utils.set_cube_standard_name_if_valid(cube, kernel_var_details[idx][2]) try: cube.units = kernel_var_details[idx][3] except ValueError: logging.warn( "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3])) # Sort the cube into the correct shape, so that the order of coordinates # is the same as in the source data coord_map = sorted(coord_map, key=lambda x: x[1]) transpose_order = [coord[2] for coord in coord_map] cube.transpose(transpose_order) output.append(cube) return output