Example #1
0
    def read_data_list(self, filenames, variables, product=None, aliases=None):
        """
        Read multiple data objects. Files can be either gridded or ungridded but not a mix of both.

        :param filenames: One or more filenames of the files to read
        :type filenames: string or list
        :param variables: One or more variables to read from the files
        :type variables: string or list
        :param str product: Name of data product to use (optional)
        :param aliases: List of variable aliases to put on each variables
         data object as an alternative means of identifying them. (Optional)
        :return:  A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the
         type of data contained in the files)
        """
        # if filenames or variables are not lists, make them lists of 1 element
        filenames = listify(filenames)
        variables = listify(variables)
        aliases = listify(aliases) if aliases else None

        variables = self._expand_wildcards(variables, filenames, product)

        data_list = None
        for idx, variable in enumerate(variables):
            var_data = self._get_data_func(filenames, variable, product)
            var_data.filenames = filenames
            if aliases:
                try:
                    var_data.alias = aliases[idx]
                except IndexError:
                    raise ValueError("Number of aliases does not match number of variables")
            if data_list is None:
                data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList()
            data_list.append(var_data)
        assert data_list is not None
        return data_list
Example #2
0
    def collocate(self, points, data, constraint, kernel):
        """
        :param points: cube defining the sample points
        :param data: CommonData object providing data to be collocated (or list of Data)
        :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that
                           data based on it's internal parameters
        :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value
        :return: GriddedDataList of collocated data
        """
        log_memory_profile("GeneralGriddedCollocator Initial")
        if isinstance(data, list):
            # If data is a list then call this method recursively over each element
            output_list = []
            for variable in data:
                collocated = self.collocate(points, variable, constraint, kernel)
                output_list.extend(collocated)
            return GriddedDataList(output_list)

        data_points = data.get_non_masked_points()

        log_memory_profile("GeneralGriddedCollocator Created data hyperpoint list view")

        # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates.
        coord_map = make_coord_map(points, data)
        if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()):
            raise cis.exceptions.UserPrintableException(
                "A sample variable has been specified but not all coordinates in the data appear in the sample so "
                "there are multiple points in the sample data so whether the data is missing or not can not be "
                "determined")

        coords = points.coords()
        shape = []
        output_coords = []

        # Find shape of coordinates to be iterated over.
        for (hpi, ci, shi) in coord_map:
            coord = coords[ci]
            if coord.ndim > 1:
                raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater"
                                          " than one is not supported (coordinate %s)", coord.name())
            # Ensure that bounds exist.
            if not coord.has_bounds():
                logging.warning("Creating guessed bounds as none exist in file")
                coord.guess_bounds()
            shape.append(coord.shape[0])
            output_coords.append(coord)

        _fix_longitude_range(coords, data_points)

        log_memory_profile("GeneralGriddedCollocator Created output coord map")

        # Create index if constraint supports it.
        data_index.create_indexes(constraint, coords, data_points, coord_map)
        data_index.create_indexes(kernel, points, data_points, coord_map)

        log_memory_profile("GeneralGriddedCollocator Created indexes")

        # Initialise output array as initially all masked, and set the appropriate fill value.
        values = []
        for i in range(kernel.return_size):
            val = np.ma.zeros(shape)
            val.mask = True
            val.fill_value = self.fill_value
            values.append(val)

        if kernel.return_size == 1:
            set_value_kernel = self._set_single_value_kernel
        else:
            set_value_kernel = self._set_multi_value_kernel

        logging.info("--> Co-locating...")

        if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"):
            # Iterate over constrained cells
            iterator = constraint.get_iterator_for_data_only(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, data_values in iterator:
                try:
                    kernel_val = kernel.get_value_for_data_only(data_values)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass
        else:
            # Iterate over constrained cells
            iterator = constraint.get_iterator(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, hp, con_points in iterator:
                try:
                    kernel_val = kernel.get_value(hp, con_points)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass

        log_memory_profile("GeneralGriddedCollocator Completed collocation")

        # Construct an output cube containing the collocated data.
        kernel_var_details = kernel.get_variable_details(self.var_name or data.var_name,
                                                         self.var_long_name or data.long_name,
                                                         data.standard_name,
                                                         self.var_units or data.units)
        output = GriddedDataList([])
        for idx, val in enumerate(values):
            cube = self._create_collocated_cube(data, val, output_coords)
            data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data)
            data_with_nan_and_inf_removed.set_fill_value(self.fill_value)
            cube.data = data_with_nan_and_inf_removed
            cube.var_name = kernel_var_details[idx][0]
            cube.long_name = kernel_var_details[idx][1]
            set_standard_name_if_valid(cube, kernel_var_details[idx][2])
            try:
                cube.units = kernel_var_details[idx][3]
            except ValueError:
                logging.warn(
                    "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3]))

            # Sort the cube into the correct shape, so that the order of coordinates
            # is the same as in the source data
            coord_map = sorted(coord_map, key=lambda x: x[1])
            transpose_order = [coord[2] for coord in coord_map]
            cube.transpose(transpose_order)
            output.append(cube)

        log_memory_profile("GeneralGriddedCollocator Finished")

        return output
    def collocate(self, points, data, constraint, kernel):
        """
        :param points: cube defining the sample points
        :param data: CommonData object providing data to be collocated (or list of Data)
        :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that
                           data based on it's internal parameters
        :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value
        :return: GriddedDataList of collocated data
        """
        if isinstance(data, list):
            # If data is a list then call this method recursively over each element
            output_list = []
            for variable in data:
                collocated = self.collocate(points, variable, constraint, kernel)
                output_list.extend(collocated)
            return GriddedDataList(output_list)

        data_points = data.get_non_masked_points()

        # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates.
        coord_map = make_coord_map(points, data)
        if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()):
            raise cis.exceptions.UserPrintableException(
                "A sample variable has been specified but not all coordinates in the data appear in the sample so "
                "there are multiple points in the sample data so whether the data is missing or not can not be "
                "determined")

        coords = points.coords()
        shape = []
        output_coords = []

        # Find shape of coordinates to be iterated over.
        for (hpi, ci, shi) in coord_map:
            coord = coords[ci]
            if coord.ndim > 1:
                raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater"
                                          " than one is not supported (coordinate %s)", coord.name())
            # Ensure that bounds exist.
            if not coord.has_bounds():
                logging.warning("Creating guessed bounds as none exist in file")
                coord.guess_bounds()
            shape.append(coord.shape[0])
            output_coords.append(coord)

        _fix_longitude_range(coords, data_points)

        # Create index if constraint supports it.
        data_index.create_indexes(constraint, coords, data_points, coord_map)
        data_index.create_indexes(kernel, points, data_points, coord_map)

        # Initialise output array as initially all masked, and set the appropriate fill value.
        values = []
        for i in range(kernel.return_size):
            val = np.ma.zeros(shape)
            val.mask = True
            val.fill_value = self.fill_value
            values.append(val)

        if kernel.return_size == 1:
            set_value_kernel = self._set_single_value_kernel
        else:
            set_value_kernel = self._set_multi_value_kernel

        logging.info("--> Co-locating...")

        if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"):
            # Iterate over constrained cells
            iterator = constraint.get_iterator_for_data_only(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, data_values in iterator:
                try:
                    kernel_val = kernel.get_value_for_data_only(data_values)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass
        else:
            # Iterate over constrained cells
            iterator = constraint.get_iterator(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, hp, con_points in iterator:
                try:
                    kernel_val = kernel.get_value(hp, con_points)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass

        # Construct an output cube containing the collocated data.
        kernel_var_details = kernel.get_variable_details(data.var_name, data.long_name, data.standard_name, data.units)
        output = GriddedDataList([])
        for idx, val in enumerate(values):
            cube = self._create_collocated_cube(data, val, output_coords)
            data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data)
            data_with_nan_and_inf_removed.set_fill_value(self.fill_value)
            cube.data = data_with_nan_and_inf_removed
            cube.var_name = kernel_var_details[idx][0]
            cube.long_name = kernel_var_details[idx][1]
            cis.utils.set_cube_standard_name_if_valid(cube, kernel_var_details[idx][2])
            try:
                cube.units = kernel_var_details[idx][3]
            except ValueError:
                logging.warn(
                    "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3]))

            # Sort the cube into the correct shape, so that the order of coordinates
            # is the same as in the source data
            coord_map = sorted(coord_map, key=lambda x: x[1])
            transpose_order = [coord[2] for coord in coord_map]
            cube.transpose(transpose_order)
            output.append(cube)

        return output