def test_complete_collapse_one_dim_using_moments_kernel(self):
        self.kernel = aggregation_kernels['moments']
        data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data1.var_name = 'var1'
        data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data2.var_name = 'var2'
        data2.data += 10
        data = GriddedDataList([data1, data2])

        output = data.collapsed(['x'], how=self.kernel)

        expect_mean = numpy.array([[5.5, 8.75, 9]])
        expect_stddev = numpy.array([numpy.sqrt(15), numpy.sqrt(26.25), numpy.sqrt(30)])
        expect_count = numpy.array([[4, 4, 4]])

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output
        assert mean_1.var_name == 'var1'
        assert stddev_1.var_name == 'var1_std_dev'
        assert count_1.var_name == 'var1_num_points'
        assert mean_2.var_name == 'var2'
        assert stddev_2.var_name == 'var2_std_dev'
        assert count_2.var_name == 'var2_num_points'
        assert_arrays_almost_equal(mean_1.data, expect_mean)
        assert_arrays_almost_equal(mean_2.data, expect_mean + 10)
        assert_arrays_almost_equal(stddev_1.data, expect_stddev)
        assert_arrays_almost_equal(stddev_2.data, expect_stddev)
        assert_arrays_almost_equal(count_1.data, expect_count)
        assert_arrays_almost_equal(count_2.data, expect_count)
    def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord(
            self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(
            make_mock_cube(time_dim_length=7, hybrid_pr_len=5))
        data2 = make_from_cube(
            make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1))
        datalist = GriddedDataList([data1, data2])

        cube_out = datalist.collapsed(['t', 'x'], how=self.kernel)

        result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0],
                                   [156.0, 157.0, 158.0, 159.0, 160.0],
                                   [261.0, 262.0, 263.0, 264.0, 265.0],
                                   [366.0, 367.0, 368.0, 369.0, 370.0],
                                   [471.0, 472.0, 473.0, 474.0, 475.0]],
                                  dtype=np.float)

        multidim_coord_points = numpy.array(
            [1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float)

        assert_arrays_almost_equal(cube_out[0].data, result_data)
        assert_arrays_almost_equal(cube_out[1].data, result_data + 1)
        assert_arrays_almost_equal(
            cube_out[0].coord('surface_air_pressure').points,
            multidim_coord_points)
        assert_arrays_almost_equal(
            cube_out[1].coord('surface_air_pressure').points,
            multidim_coord_points)
Exemple #3
0
 def test_gridded_list_write_time_as_unlimited_dimension(self):
     data = GriddedDataList(
         [make_from_cube(make_mock_cube(time_dim_length=7))])
     data[0].var_name = 'rain'
     data.save_data(tmp_file)
     self.d = Dataset(tmp_file)
     assert self.d.dimensions['time'].isunlimited()
Exemple #4
0
 def test_gridded_list_write_no_time_has_no_unlimited_dimension(self):
     data = GriddedDataList([make_from_cube(make_mock_cube())])
     data[0].var_name = 'rain'
     data.save_data(tmp_file)
     self.d = Dataset(tmp_file)
     for d in self.d.dimensions.values():
         assert not d.isunlimited()
Exemple #5
0
    def aggregate_gridded(self, kernel):
        # Make sure all coordinate have bounds - important for weighting and aggregating
        # Only try and guess bounds on Dim Coords
        for coord in self.data.coords(dim_coords=True):
            if not coord.has_bounds() and len(coord.points) > 1:
                coord.guess_bounds()
                logging.warning("Creating guessed bounds as none exist in file")
                new_coord_number = self.data.coord_dims(coord)
                self.data.remove_coord(coord.name())
                self.data.add_dim_coord(coord, new_coord_number)
        coords = []
        for coord in self.data.coords():
            grid, guessed_axis = self.get_grid(coord)

            if grid is not None:
                if isnan(grid.delta):
                    logging.info('Aggregating on ' + coord.name() + ', collapsing completely and using ' +
                                 kernel.cell_method + ' kernel.')
                    coords.append(coord)
                else:
                    raise NotImplementedError("Aggregation using partial collapse of "
                                              "coordinates is not supported for GriddedData")

        output = GriddedDataList([])
        if isinstance(kernel, MultiKernel):
            for sub_kernel in kernel.sub_kernels:
                sub_kernel_out = self._gridded_full_collapse(coords, sub_kernel)
                output.append_or_extend(sub_kernel_out)
        else:
            output.append_or_extend(self._gridded_full_collapse(coords, kernel))
        return output
    def test_complete_collapse_two_dims_using_moments_kernel(self):
        self.kernel = aggregation_kernels['moments']
        data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data1.var_name = 'var1'
        data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data2.var_name = 'var2'
        data2.data += 10
        data = GriddedDataList([data1, data2])
        output = data.collapsed(['x', 'y'], how=self.kernel)

        expect_mean = numpy.array(7.75)
        expect_stddev = numpy.array(numpy.sqrt(244.25 / 11))
        expect_count = numpy.array(12)

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output
        assert mean_1.var_name == 'var1'
        assert stddev_1.var_name == 'var1_std_dev'
        assert count_1.var_name == 'var1_num_points'
        assert mean_2.var_name == 'var2'
        assert stddev_2.var_name == 'var2_std_dev'
        assert count_2.var_name == 'var2_num_points'
        # Latitude area weighting means these aren't quite right so increase the rtol.
        assert numpy.allclose(mean_1.data, expect_mean, 1e-3)
        assert numpy.allclose(mean_2.data, expect_mean + 10, 1e-3)
        assert numpy.allclose(stddev_1.data, expect_stddev)
        assert numpy.allclose(stddev_2.data, expect_stddev)
        assert numpy.allclose(count_1.data, expect_count)
        assert numpy.allclose(count_2.data, expect_count)
 def test_gridded_list_write_no_time_has_no_unlimited_dimension(self):
     data = GriddedDataList([make_from_cube(make_mock_cube())])
     data[0].var_name = 'rain'
     data.save_data(tmp_file)
     self.d = Dataset(tmp_file)
     for d in self.d.dimensions.values():
         assert not d.isunlimited()
    def test_complete_collapse_two_dims_using_moments_kernel(self):
        self.kernel = aggregation_kernels['moments']
        data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data1.var_name = 'var1'
        data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data2.var_name = 'var2'
        data2.data += 10
        data = GriddedDataList([data1, data2])
        output = data.collapsed(['x', 'y'], how=self.kernel)

        expect_mean = numpy.array(7.75)
        expect_stddev = numpy.array(numpy.sqrt(244.25 / 11))
        expect_count = numpy.array(12)

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output
        assert mean_1.var_name == 'var1'
        assert stddev_1.var_name == 'var1_std_dev'
        assert count_1.var_name == 'var1_num_points'
        assert mean_2.var_name == 'var2'
        assert stddev_2.var_name == 'var2_std_dev'
        assert count_2.var_name == 'var2_num_points'
        # Latitude area weighting means these aren't quite right so increase the rtol.
        assert numpy.allclose(mean_1.data, expect_mean, 1e-3)
        assert numpy.allclose(mean_2.data, expect_mean + 10, 1e-3)
        assert numpy.allclose(stddev_1.data, expect_stddev)
        assert numpy.allclose(stddev_2.data, expect_stddev)
        assert numpy.allclose(count_1.data, expect_count)
        assert numpy.allclose(count_2.data, expect_count)
Exemple #9
0
    def read_data_list(self, filenames, variables, product=None, aliases=None):
        """
        Read multiple data objects. Files can be either gridded or ungridded but not a mix of both.

        :param filenames: One or more filenames of the files to read
        :type filenames: string or list
        :param variables: One or more variables to read from the files
        :type variables: string or list
        :param str product: Name of data product to use (optional)
        :param aliases: List of variable aliases to put on each variables
         data object as an alternative means of identifying them. (Optional)
        :return:  A list of the data read out (either a GriddedDataList or UngriddedDataList depending on the
         type of data contained in the files)
        """
        # if filenames or variables are not lists, make them lists of 1 element
        filenames = listify(filenames)
        variables = listify(variables)
        aliases = listify(aliases) if aliases else None

        variables = self._expand_wildcards(variables, filenames, product)

        data_list = None
        for idx, variable in enumerate(variables):
            var_data = self._get_data_func(filenames, variable, product)
            var_data.filenames = filenames
            if aliases:
                try:
                    var_data.alias = aliases[idx]
                except IndexError:
                    raise ValueError("Number of aliases does not match number of variables")
            if data_list is None:
                data_list = GriddedDataList() if var_data.is_gridded else UngriddedDataList()
            data_list.append(var_data)
        assert data_list is not None
        return data_list
Exemple #10
0
def stats_cmd(main_arguments):
    """
    Main routine for handling calls to the statistics command.

    :param main_arguments: The command line arguments (minus the stats command)
    """
    from cis.stats import StatsAnalyzer
    from cis.data_io.gridded_data import GriddedDataList
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    analyzer = StatsAnalyzer(*data_list)
    results = analyzer.analyze()
    header = "RESULTS OF STATISTICAL COMPARISON:"
    note = "Compared all points which have non-missing values in both variables"
    header_length = max(len(header), len(note))
    print(header_length * '=')
    print(header)
    print(header_length * '-')
    print(note)
    print(header_length * '=')
    for result in results:
        print(result.pprint())
    if main_arguments.output:
        cubes = GriddedDataList([result.as_cube() for result in results])
        variables = []
        filenames = []
        for datagroup in main_arguments.datagroups:
            variables.extend(datagroup['variables'])
            filenames.extend(datagroup['filenames'])
        history = "Statistical comparison performed using CIS version " + __version__ + \
                  "\n variables: " + str(variables) + \
                  "\n from files: " + str(set(filenames))
        cubes.add_history(history)
        cubes.save_data(main_arguments.output)
    def test_complete_collapse_one_dim_using_moments_kernel(self):
        self.kernel = aggregation_kernels['moments']
        data1 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data1.var_name = 'var1'
        data2 = make_from_cube(make_5x3_lon_lat_2d_cube_with_missing_data())
        data2.var_name = 'var2'
        data2.data += 10
        data = GriddedDataList([data1, data2])

        output = data.collapsed(['x'], how=self.kernel)

        expect_mean = numpy.array([[5.5, 8.75, 9]])
        expect_stddev = numpy.array(
            [numpy.sqrt(15), numpy.sqrt(26.25),
             numpy.sqrt(30)])
        expect_count = numpy.array([[4, 4, 4]])

        assert isinstance(output, GriddedDataList)
        assert len(output) == 6
        mean_1, stddev_1, count_1, mean_2, stddev_2, count_2 = output
        assert mean_1.var_name == 'var1'
        assert stddev_1.var_name == 'var1_std_dev'
        assert count_1.var_name == 'var1_num_points'
        assert mean_2.var_name == 'var2'
        assert stddev_2.var_name == 'var2_std_dev'
        assert count_2.var_name == 'var2_num_points'
        assert_arrays_almost_equal(mean_1.data, expect_mean)
        assert_arrays_almost_equal(mean_2.data, expect_mean + 10)
        assert_arrays_almost_equal(stddev_1.data, expect_stddev)
        assert_arrays_almost_equal(stddev_2.data, expect_stddev)
        assert_arrays_almost_equal(count_1.data, expect_count)
        assert_arrays_almost_equal(count_2.data, expect_count)
Exemple #12
0
 def test_empty_longitude_subset_of_gridded_data_list_returns_no_data(self):
     """
     Checks that the convention of returning None if subsetting results in an empty subset.
     Longitude has a modulus and so uses the IRIS intersection method
     """
     data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube()])
     subset = data.subset(longitude=[1.0, 3.0])
     assert (subset is None)
Exemple #13
0
 def test_empty_time_subset_of_gridded_data_list_returns_no_data(self):
     """
     Checks that the convention of returning None if subsetting results in an empty subset.
     Longitude has no modulus and so uses the IRIS extract method
     """
     data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube_with_time()])
     subset = data.subset(time=[140500, 140550])
     assert (subset is None)
Exemple #14
0
 def test_GIVEN_GriddedDataList_WHEN_constrain_THEN_correctly_subsetted_GriddedDataList_returned(self):
     gridded1 = cis.test.util.mock.make_square_5x3_2d_cube()
     gridded2 = cis.test.util.mock.make_square_5x3_2d_cube()
     datalist = GriddedDataList([gridded1, gridded2])
     subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 5.0])
     assert isinstance(subset, GriddedDataList)
     assert (subset[0].data.tolist() == [[5, 6], [8, 9], [11, 12]])
     assert (subset[1].data.tolist() == [[5, 6], [8, 9], [11, 12]])
Exemple #15
0
 def test_empty_longitude_subset_of_gridded_data_list_returns_no_data(self):
     """
     Checks that the convention of returning None if subsetting results in an empty subset.
     Longitude has a modulus and so uses the IRIS intersection method
     """
     data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube()])
     subset = data.subset(longitude=[1.0, 3.0])
     assert (subset is None)
Exemple #16
0
 def test_GIVEN_GriddedDataList_WHEN_constrain_THEN_correctly_subsetted_GriddedDataList_returned(
         self):
     gridded1 = cis.test.util.mock.make_square_5x3_2d_cube()
     gridded2 = cis.test.util.mock.make_square_5x3_2d_cube()
     datalist = GriddedDataList([gridded1, gridded2])
     subset = datalist.subset(longitude=[0.0, 5.0], latitude=[-5.0, 5.0])
     assert isinstance(subset, GriddedDataList)
     assert (subset[0].data.tolist() == [[5, 6], [8, 9], [11, 12]])
     assert (subset[1].data.tolist() == [[5, 6], [8, 9], [11, 12]])
Exemple #17
0
 def test_empty_time_subset_of_gridded_data_list_returns_no_data(self):
     """
     Checks that the convention of returning None if subsetting results in an empty subset.
     Longitude has no modulus and so uses the IRIS extract method
     """
     data = GriddedDataList(
         [cis.test.util.mock.make_square_5x3_2d_cube_with_time()])
     subset = data.subset(time=[140500, 140550])
     assert (subset is None)
Exemple #18
0
    def test_taylor_diagram_gridded(self):
        from cis.test.util.mock import make_mock_cube
        from cis.data_io.gridded_data import GriddedDataList

        d = GriddedDataList([make_mock_cube(), make_mock_cube(data_offset=2)])
        d[0].var_name = 'snow'
        d[1].var_name = 'rain'

        d.plot(how='taylor')

        self.check_graphic()
Exemple #19
0
    def test_iris_comparative_scatter(self):
        from cis.test.util.mock import make_mock_cube
        from cis.data_io.gridded_data import GriddedDataList

        d = GriddedDataList([make_mock_cube(), make_mock_cube(data_offset=2)])
        d[0].var_name = 'snow'
        d[1].var_name = 'rain'

        d.plot(how='comparativescatter')

        self.check_graphic()
Exemple #20
0
 def test_empty_time_subset_of_gridded_data_list_returns_no_data(self):
     """
     Checks that the convention of returning None if subsetting results in an empty subset.
     Longitude has no modulus and so uses the IRIS extract method
     """
     data = GriddedDataList([cis.test.util.mock.make_square_5x3_2d_cube_with_time()])
     long_coord = data.coord('time')
     constraint = subset_constraint.GriddedSubsetConstraint()
     constraint.set_limit(long_coord, 140500, 140550)
     subset = constraint.constrain(data)
     assert (subset is None)
Exemple #21
0
    def test_iris_multiple_scatter(self):
        from cis.test.util.mock import make_mock_cube
        from cis.data_io.gridded_data import GriddedDataList

        # This only works with one dimensional gridded data
        d = GriddedDataList([make_mock_cube(lat_dim_length=0), make_mock_cube(lat_dim_length=0, data_offset=2)])
        d[0].var_name = 'snow'
        d[1].var_name = 'rain'

        # Will default to line plots
        d.plot()

        self.check_graphic()
    def test_aggregate_mean(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube())
        data2 = make_from_cube(make_mock_cube(data_offset=1))
        datalist = GriddedDataList([data1, data2])
        cube_out = datalist.collapsed(['y'], how=self.kernel)

        result1 = numpy.array([7, 8, 9])
        result2 = result1 + 1

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
    def test_aggregate_mean(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube())
        data2 = make_from_cube(make_mock_cube(data_offset=1))
        datalist = GriddedDataList([data1, data2])
        cube_out = datalist.collapsed(['y'], how=self.kernel)

        result1 = numpy.array([7, 8, 9])
        result2 = result1 + 1

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
    def test_collapse_vertical_coordinate(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube(alt_dim_length=6))
        data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1))
        datalist = GriddedDataList([data1, data2])
        cube_out = datalist.collapsed(['z'], how=self.kernel)

        result1 = data1.data.mean(axis=2)
        result2 = result1 + 1

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
        assert numpy.array_equal(data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
Exemple #25
0
 def _make_two_gridded(self):
     data1 = make_from_cube(mock.make_mock_cube())
     data2 = make_from_cube(mock.make_mock_cube(data_offset=10))
     data1.var_name = 'var1'
     data2._var_name = 'var2'
     data1.filenames = ['filename1']
     data2.filenames = ['filename2']
     self.data = [data1, data2]
     self.data = GriddedDataList([data1, data2])
    def test_collapse_vertical_coordinate(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube(alt_dim_length=6))
        data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1))
        datalist = GriddedDataList([data1, data2])
        cube_out = datalist.collapsed(['z'], how=self.kernel)

        result1 = data1.data.mean(axis=2)
        result2 = result1 + 1

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
        assert numpy.array_equal(
            data1.coords('latitude')[0].points,
            cube_out.coords('latitude')[0].points)
    def test_collapse_vertical_coordinate_weighted_aggregator(self):
        """
        We use a weighted aggregator, though no weights should be applied since we're only summing over the vertical
        """
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube(alt_dim_length=6))
        data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1))
        datalist = GriddedDataList([data1, data2])
        cube_out = datalist.collapsed(['z'], how=iris.analysis.SUM)

        result1 = np.sum(data1.data, axis=2)
        result2 = np.sum(data2.data, axis=2)

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
        assert numpy.array_equal(data1.coords('latitude')[0].points, cube_out.coords('latitude')[0].points)
    def test_partial_aggregation_over_more_than_one_dim_on_multidimensional_coord(self):
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5))
        data2 = make_from_cube(make_mock_cube(time_dim_length=7, hybrid_pr_len=5, data_offset=1))
        datalist = GriddedDataList([data1, data2])

        cube_out = datalist.collapsed(['t', 'x'], how=self.kernel)

        result_data = numpy.array([[51.0, 52.0, 53.0, 54.0, 55.0],
                                   [156.0, 157.0, 158.0, 159.0, 160.0],
                                   [261.0, 262.0, 263.0, 264.0, 265.0],
                                   [366.0, 367.0, 368.0, 369.0, 370.0],
                                   [471.0, 472.0, 473.0, 474.0, 475.0]], dtype=np.float)

        multidim_coord_points = numpy.array([1000000., 3100000., 5200000., 7300000., 9400000.], dtype=np.float)

        assert_arrays_almost_equal(cube_out[0].data, result_data)
        assert_arrays_almost_equal(cube_out[1].data, result_data+1)
        assert_arrays_almost_equal(cube_out[0].coord('surface_air_pressure').points, multidim_coord_points)
        assert_arrays_almost_equal(cube_out[1].coord('surface_air_pressure').points, multidim_coord_points)
Exemple #29
0
def stats_cmd(main_arguments):
    """
    Main routine for handling calls to the statistics command.

    :param main_arguments: The command line arguments (minus the stats command)
    """
    from cis.stats import StatsAnalyzer
    from cis.data_io.gridded_data import GriddedDataList
    data_reader = DataReader()
    data_list = data_reader.read_datagroups(main_arguments.datagroups)
    analyzer = StatsAnalyzer(*data_list)
    results = analyzer.analyze()
    header = "RESULTS OF STATISTICAL COMPARISON:"
    note = "Compared all points which have non-missing values in both variables"
    header_length = max(len(header), len(note))
    print(header_length * '=')
    print(header)
    print(header_length * '-')
    print(note)
    print(header_length * '=')
    for result in results:
        print(result.pprint())
    if main_arguments.output:
        cubes = GriddedDataList([result.as_cube() for result in results])
        variables = []
        filenames = []
        for datagroup in main_arguments.datagroups:
            variables.extend(datagroup['variables'])
            filenames.extend(datagroup['filenames'])
        history = "Statistical comparison performed using CIS version " + __version__ + \
                  "\n variables: " + str(variables) + \
                  "\n from files: " + str(set(filenames))
        cubes.add_history(history)
        cubes.save_data(main_arguments.output)
    def test_collapse_vertical_coordinate_weighted_aggregator(self):
        """
        We use a weighted aggregator, though no weights should be applied since we're only summing over the vertical
        """
        from cis.data_io.gridded_data import GriddedDataList, make_from_cube

        data1 = make_from_cube(make_mock_cube(alt_dim_length=6))
        data2 = make_from_cube(make_mock_cube(alt_dim_length=6, data_offset=1))
        datalist = GriddedDataList([data1, data2])
        cube_out = datalist.collapsed(['z'], how=iris.analysis.SUM)

        result1 = np.sum(data1.data, axis=2)
        result2 = np.sum(data2.data, axis=2)

        assert isinstance(cube_out, GriddedDataList)

        # There is a small deviation to the weighting correction applied by Iris when completely collapsing
        assert_arrays_almost_equal(result1, cube_out[0].data)
        assert_arrays_almost_equal(result2, cube_out[1].data)
        assert numpy.array_equal(
            data1.coords('latitude')[0].points,
            cube_out.coords('latitude')[0].points)
Exemple #31
0
    def __call__(self, kernel):
        from cis.data_io.gridded_data import GriddedDataList
        from cis.aggregation.collapse_kernels import MultiKernel

        # Make sure all coordinate have bounds - important for weighting and aggregating
        # Only try and guess bounds on Dim Coords
        for coord in self.data.coords(dim_coords=True):
            if not coord.has_bounds() and len(coord.points) > 1:
                coord.guess_bounds()
                logging.warning("Creating guessed bounds as none exist in file")
                new_coord_number = self.data.coord_dims(coord)
                self.data.remove_coord(coord.name())
                self.data.add_dim_coord(coord, new_coord_number)

        output = GriddedDataList([])
        if isinstance(kernel, MultiKernel):
            for sub_kernel in kernel.sub_kernels:
                sub_kernel_out = self._gridded_full_collapse(sub_kernel)
                output.append_or_extend(sub_kernel_out)
        else:
            output.append_or_extend(self._gridded_full_collapse(kernel))
        return output
Exemple #32
0
    def __call__(self, kernel):
        from cis.data_io.gridded_data import GriddedDataList
        from cis.aggregation.collapse_kernels import MultiKernel

        # Make sure all coordinate have bounds - important for weighting and aggregating
        # Only try and guess bounds on Dim Coords
        for coord in self.data.coords(dim_coords=True):
            if not coord.has_bounds() and len(coord.points) > 1:
                coord.guess_bounds()
                logging.warning(
                    "Creating guessed bounds as none exist in file")
                new_coord_number = self.data.coord_dims(coord)
                self.data.remove_coord(coord.name())
                self.data.add_dim_coord(coord, new_coord_number)

        output = GriddedDataList([])
        if isinstance(kernel, MultiKernel):
            for sub_kernel in kernel.sub_kernels:
                sub_kernel_out = self._gridded_full_collapse(sub_kernel)
                output.append_or_extend(sub_kernel_out)
        else:
            output.append_or_extend(self._gridded_full_collapse(kernel))
        return output
    def collocate(self, points, data, constraint, kernel):
        """
        :param points: cube defining the sample points
        :param data: CommonData object providing data to be collocated (or list of Data)
        :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that
                           data based on it's internal parameters
        :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value
        :return: GriddedDataList of collocated data
        """
        if isinstance(data, list):
            # If data is a list then call this method recursively over each element
            output_list = []
            for variable in data:
                collocated = self.collocate(points, variable, constraint, kernel)
                output_list.extend(collocated)
            return GriddedDataList(output_list)

        data_points = data.get_non_masked_points()

        # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates.
        coord_map = make_coord_map(points, data)
        if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()):
            raise cis.exceptions.UserPrintableException(
                "A sample variable has been specified but not all coordinates in the data appear in the sample so "
                "there are multiple points in the sample data so whether the data is missing or not can not be "
                "determined")

        coords = points.coords()
        shape = []
        output_coords = []

        # Find shape of coordinates to be iterated over.
        for (hpi, ci, shi) in coord_map:
            coord = coords[ci]
            if coord.ndim > 1:
                raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater"
                                          " than one is not supported (coordinate %s)", coord.name())
            # Ensure that bounds exist.
            if not coord.has_bounds():
                logging.warning("Creating guessed bounds as none exist in file")
                coord.guess_bounds()
            shape.append(coord.shape[0])
            output_coords.append(coord)

        _fix_longitude_range(coords, data_points)

        # Create index if constraint supports it.
        data_index.create_indexes(constraint, coords, data_points, coord_map)
        data_index.create_indexes(kernel, points, data_points, coord_map)

        # Initialise output array as initially all masked, and set the appropriate fill value.
        values = []
        for i in range(kernel.return_size):
            val = np.ma.zeros(shape)
            val.mask = True
            val.fill_value = self.fill_value
            values.append(val)

        if kernel.return_size == 1:
            set_value_kernel = self._set_single_value_kernel
        else:
            set_value_kernel = self._set_multi_value_kernel

        logging.info("--> Co-locating...")

        if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"):
            # Iterate over constrained cells
            iterator = constraint.get_iterator_for_data_only(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, data_values in iterator:
                try:
                    kernel_val = kernel.get_value_for_data_only(data_values)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass
        else:
            # Iterate over constrained cells
            iterator = constraint.get_iterator(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, hp, con_points in iterator:
                try:
                    kernel_val = kernel.get_value(hp, con_points)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass

        # Construct an output cube containing the collocated data.
        kernel_var_details = kernel.get_variable_details(data.var_name, data.long_name, data.standard_name, data.units)
        output = GriddedDataList([])
        for idx, val in enumerate(values):
            cube = self._create_collocated_cube(data, val, output_coords)
            data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data)
            data_with_nan_and_inf_removed.set_fill_value(self.fill_value)
            cube.data = data_with_nan_and_inf_removed
            cube.var_name = kernel_var_details[idx][0]
            cube.long_name = kernel_var_details[idx][1]
            cis.utils.set_cube_standard_name_if_valid(cube, kernel_var_details[idx][2])
            try:
                cube.units = kernel_var_details[idx][3]
            except ValueError:
                logging.warn(
                    "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3]))

            # Sort the cube into the correct shape, so that the order of coordinates
            # is the same as in the source data
            coord_map = sorted(coord_map, key=lambda x: x[1])
            transpose_order = [coord[2] for coord in coord_map]
            cube.transpose(transpose_order)
            output.append(cube)

        return output
Exemple #34
0
    def collocate(self, points, data, constraint, kernel):
        """
        :param points: cube defining the sample points
        :param data: CommonData object providing data to be collocated (or list of Data)
        :param constraint: instance of a Constraint subclass, which takes a data object and returns a subset of that
                           data based on it's internal parameters
        :param kernel: instance of a Kernel subclass which takes a number of points and returns a single value
        :return: GriddedDataList of collocated data
        """
        log_memory_profile("GeneralGriddedCollocator Initial")
        if isinstance(data, list):
            # If data is a list then call this method recursively over each element
            output_list = []
            for variable in data:
                collocated = self.collocate(points, variable, constraint, kernel)
                output_list.extend(collocated)
            return GriddedDataList(output_list)

        data_points = data.get_non_masked_points()

        log_memory_profile("GeneralGriddedCollocator Created data hyperpoint list view")

        # Work out how to iterate over the cube and map HyperPoint coordinates to cube coordinates.
        coord_map = make_coord_map(points, data)
        if self.missing_data_for_missing_sample and len(coord_map) is not len(points.coords()):
            raise cis.exceptions.UserPrintableException(
                "A sample variable has been specified but not all coordinates in the data appear in the sample so "
                "there are multiple points in the sample data so whether the data is missing or not can not be "
                "determined")

        coords = points.coords()
        shape = []
        output_coords = []

        # Find shape of coordinates to be iterated over.
        for (hpi, ci, shi) in coord_map:
            coord = coords[ci]
            if coord.ndim > 1:
                raise NotImplementedError("Co-location of data onto a cube with a coordinate of dimension greater"
                                          " than one is not supported (coordinate %s)", coord.name())
            # Ensure that bounds exist.
            if not coord.has_bounds():
                logging.warning("Creating guessed bounds as none exist in file")
                coord.guess_bounds()
            shape.append(coord.shape[0])
            output_coords.append(coord)

        _fix_longitude_range(coords, data_points)

        log_memory_profile("GeneralGriddedCollocator Created output coord map")

        # Create index if constraint supports it.
        data_index.create_indexes(constraint, coords, data_points, coord_map)
        data_index.create_indexes(kernel, points, data_points, coord_map)

        log_memory_profile("GeneralGriddedCollocator Created indexes")

        # Initialise output array as initially all masked, and set the appropriate fill value.
        values = []
        for i in range(kernel.return_size):
            val = np.ma.zeros(shape)
            val.mask = True
            val.fill_value = self.fill_value
            values.append(val)

        if kernel.return_size == 1:
            set_value_kernel = self._set_single_value_kernel
        else:
            set_value_kernel = self._set_multi_value_kernel

        logging.info("--> Co-locating...")

        if hasattr(kernel, "get_value_for_data_only") and hasattr(constraint, "get_iterator_for_data_only"):
            # Iterate over constrained cells
            iterator = constraint.get_iterator_for_data_only(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, data_values in iterator:
                try:
                    kernel_val = kernel.get_value_for_data_only(data_values)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass
        else:
            # Iterate over constrained cells
            iterator = constraint.get_iterator(
                self.missing_data_for_missing_sample, coord_map, coords, data_points, shape, points, values)
            for out_indices, hp, con_points in iterator:
                try:
                    kernel_val = kernel.get_value(hp, con_points)
                    set_value_kernel(kernel_val, values, out_indices)
                except ValueError:
                    # ValueErrors are raised by Kernel when there are no points to operate on.
                    # We don't need to do anything.
                    pass

        log_memory_profile("GeneralGriddedCollocator Completed collocation")

        # Construct an output cube containing the collocated data.
        kernel_var_details = kernel.get_variable_details(self.var_name or data.var_name,
                                                         self.var_long_name or data.long_name,
                                                         data.standard_name,
                                                         self.var_units or data.units)
        output = GriddedDataList([])
        for idx, val in enumerate(values):
            cube = self._create_collocated_cube(data, val, output_coords)
            data_with_nan_and_inf_removed = np.ma.masked_invalid(cube.data)
            data_with_nan_and_inf_removed.set_fill_value(self.fill_value)
            cube.data = data_with_nan_and_inf_removed
            cube.var_name = kernel_var_details[idx][0]
            cube.long_name = kernel_var_details[idx][1]
            set_standard_name_if_valid(cube, kernel_var_details[idx][2])
            try:
                cube.units = kernel_var_details[idx][3]
            except ValueError:
                logging.warn(
                    "Units are not cf compliant, not setting them. Units {}".format(kernel_var_details[idx][3]))

            # Sort the cube into the correct shape, so that the order of coordinates
            # is the same as in the source data
            coord_map = sorted(coord_map, key=lambda x: x[1])
            transpose_order = [coord[2] for coord in coord_map]
            cube.transpose(transpose_order)
            output.append(cube)

        log_memory_profile("GeneralGriddedCollocator Finished")

        return output
Exemple #35
0
 def setUp(self):
     self.calc = Calculator()
     self.data = GriddedDataList([make_from_cube(mock.make_mock_cube())])
     self.data[0].var_name = 'var_name'
 def test_gridded_list_write_time_as_unlimited_dimension(self):
     data = GriddedDataList([make_from_cube(make_mock_cube(time_dim_length=7))])
     data[0].var_name = 'rain'
     data.save_data(tmp_file)
     self.d = Dataset(tmp_file)
     assert self.d.dimensions['time'].isunlimited()
Exemple #37
0
    def collocate(self, points, data, constraint, kernel):
        """
        This collocator takes two Iris cubes, and collocates from the data cube onto the grid of the 'points' cube. The
        collocator then returns another Iris cube.
        :param points: An Iris cube with the sampling grid to collocate onto.
        :param data: The Iris cube with the data to be collocated.
        :param constraint: None allowed yet, as this is unlikely to be required for gridded-gridded.
        :param kernel: The kernel to use, current options are gridded_gridded_nn and gridded_gridded_li.
        :return: An Iris cube with the collocated data.
        """
        self._check_for_valid_kernel(kernel)

        # Force the data longitude range to be the same as that of the sample grid.
        _fix_longitude_range(points.coords(), data)

        # Initialise variables used to create an output mask based on the sample data mask.
        sample_coord_lookup = {}  # Maps coordinate in sample data -> location in dimension order
        for idx, coord in enumerate(points.coords()):
            sample_coord_lookup[coord] = idx
        sample_coord_transpose_map = []  # For coords in both sample and data, contains the position in the sample
        other_coord_transpose_map = []  # For coords in data but not in sample, records that coord's position in data.
        repeat_size = 1
        output_mask = np.ma.nomask

        # Make a list of the coordinates we have, with each entry containing a list with the name of the coordinate and
        # the number of points along its axis. One is for the sample grid, which contains the points where we
        # interpolate too, and one is for the output grid, which will additionally contain any dimensions missing in the
        # sample grid.
        coord_names_and_sizes_for_sample_grid = []
        coord_names_and_sizes_for_output_grid = []
        for idx, coord in enumerate(data.coords(dim_coords=True)):
            # First try and find the coordinate in points, the sample grid. If an exception is thrown, it means that
            # name does not appear in the sample grid, and instead take the coordinate name and length from the original
            # data, as this is what we will be keeping.
            try:
                sample_coord = points.coords(coord.name())[0]
                coord_names_and_sizes_for_sample_grid.append([coord.name(), len(sample_coord.points)])
                # Find the index of the sample coordinate corresponding to the data coordinate.
                sample_coord_transpose_map.append(sample_coord_lookup[sample_coord])
            except IndexError:
                coord_names_and_sizes_for_output_grid.append([coord.name(), len(coord.points)])
                repeat_size *= len(coord.points)
                other_coord_transpose_map.append(idx)

        # Now we sort the sample coordinates so that they are in the same order as in the sample file,
        # rather than the order of the data file (that's the order we want the output dimensions).
        coord_names_and_sizes_for_sample_grid = [x[0] for x in sorted(zip(coord_names_and_sizes_for_sample_grid,
                                                                          sample_coord_transpose_map),
                                                                      key=lambda t: t[1])]

        # Adding the lists together in this way ensures that the coordinates not in the sample grid appear in the final
        # position, which is important for adding the points from the Iris interpolater to the new array. The data
        # returned from the Iris interpolater method will have dimensions of these missing coordinates, which needs
        # to be the final dimensions in the numpy array, as the iterator will give the position of the other dimensions.
        coord_names_and_sizes_for_output_grid = coord_names_and_sizes_for_sample_grid + \
                                                coord_names_and_sizes_for_output_grid

        # An array for the collocated data, with the correct shape
        output_shape = tuple(i[1] for i in coord_names_and_sizes_for_output_grid)
        new_data = np.zeros(output_shape)

        if self.missing_data_for_missing_sample:
            output_mask = self._make_output_mask(coord_names_and_sizes_for_sample_grid, output_shape,
                                                 points, repeat_size)

        # Now recreate the points cube, while ignoring any DimCoords in points that are not in the data cube
        new_dim_coord_list = []
        new_points_array_shape = []
        for i in range(0, len(coord_names_and_sizes_for_output_grid)):
            # Try and find the coordinate in the sample grid
            coord_found = points.coords(coord_names_and_sizes_for_output_grid[i][0])

            # If the coordinate exists in the sample grid then append the new coordinate to the list. Iris requires
            # this be given as a DimCoord object, along with a axis number, in a tuple pair.
            if len(coord_found) != 0:
                new_dim_coord_list.append((coord_found[0], len(new_dim_coord_list)))
                new_points_array_shape.append(coord_found[0].points.size)

        new_points_array = np.zeros(tuple(new_points_array_shape))

        # Use the new_data array to recreate points, without the DimCoords not in the data cube
        points = iris.cube.Cube(new_points_array, dim_coords_and_dims=new_dim_coord_list)

        output_cube = self._iris_interpolate(coord_names_and_sizes_for_output_grid,
                                             coord_names_and_sizes_for_sample_grid, data,
                                             kernel, output_mask, points, self.extrapolate)

        if not isinstance(output_cube, list):
            return GriddedDataList([output_cube])
        else:
            return output_cube