Ejemplo n.º 1
0
    def test_concat(self):
        data = create_test_data()

        split_data = [
            data.indexed(dim1=slice(10)),
            data.indexed(dim1=slice(10, None))
        ]
        self.assertDatasetIdentical(data, Dataset.concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset(
                {
                    k: v.transpose(*data[k].dimensions)
                    for k, v in iteritems(dataset.variables)
                }, dataset.attrs)

        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, Dataset.concat(datasets, dim))
            self.assertDatasetIdentical(data,
                                        Dataset.concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, Dataset.concat(datasets, data[dim], mode='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [
                k for k, v in iteritems(data.variables)
                if dim in v.dimensions and k != dim
            ]
            actual = Dataset.concat(datasets,
                                    data[dim],
                                    concat_over=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = Dataset.concat(datasets, data[dim], mode='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # Now add a new variable that doesn't depend on any of the current
        # dims and make sure the mode argument behaves as expected
        data['var4'] = ('dim4', np.arange(data.dimensions['dim3']))
        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            actual = Dataset.concat(datasets, data[dim], mode='all')
            expected = np.array(
                [data['var4'].values for _ in range(data.dimensions[dim])])
            self.assertArrayEqual(actual['var4'].values, expected)

            actual = Dataset.concat(datasets, data[dim], mode='different')
            self.assertDataArrayEqual(data['var4'], actual['var4'])
            actual = Dataset.concat(datasets, data[dim], mode='minimal')
            self.assertDataArrayEqual(data['var4'], actual['var4'])

        # verify that the dimension argument takes precedence over
        # concatenating dataset variables of the same name
        dimension = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dimension
        self.assertDatasetIdentical(expected,
                                    Dataset.concat(datasets, dimension))

        # TODO: factor this into several distinct tests
        data = create_test_data()
        split_data = [
            data.indexed(dim1=slice(10)),
            data.indexed(dim1=slice(10, None))
        ]

        with self.assertRaisesRegexp(ValueError, 'must supply at least one'):
            Dataset.concat([], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not all elements in'):
            Dataset.concat(split_data, 'dim1', concat_over=['not_found'])

        with self.assertRaisesRegexp(ValueError, 'global attributes not'):
            data0, data1 = deepcopy(split_data)
            data1.attrs['foo'] = 'bar'
            Dataset.concat([data0, data1], 'dim1', compat='identical')
        self.assertDatasetIdentical(
            data, Dataset.concat([data0, data1], 'dim1', compat='equals'))

        with self.assertRaisesRegexp(ValueError, 'encountered unexpected'):
            data0, data1 = deepcopy(split_data)
            data1['foo'] = ('bar', np.random.randn(10))
            Dataset.concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
            data0, data1 = deepcopy(split_data)
            data1['dim2'] = 2 * data1['dim2']
            Dataset.concat([data0, data1], 'dim1')
Ejemplo n.º 2
0
    def test_concat(self):
        data = create_test_data()

        split_data = [data.indexed(dim1=slice(10)),
                      data.indexed(dim1=slice(10, None))]
        self.assertDatasetIdentical(data, Dataset.concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset({k: v.transpose(*data[k].dimensions)
                           for k, v in dataset.variables.iteritems()},
                           dataset.attrs)

        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, Dataset.concat(datasets, dim))
            self.assertDatasetIdentical(
                data, Dataset.concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, Dataset.concat(datasets, data[dim], mode='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [k for k, v in data.variables.iteritems()
                           if dim in v.dimensions and k != dim]
            actual = Dataset.concat(datasets, data[dim],
                                    concat_over=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = Dataset.concat(datasets, data[dim], mode='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # Now add a new variable that doesn't depend on any of the current
        # dims and make sure the mode argument behaves as expected
        data['var4'] = ('dim4', np.arange(data.dimensions['dim3']))
        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            actual = Dataset.concat(datasets, data[dim], mode='all')
            expected = np.array([data['var4'].values
                                 for _ in range(data.dimensions[dim])])
            self.assertArrayEqual(actual['var4'].values, expected)

            actual = Dataset.concat(datasets, data[dim], mode='different')
            self.assertDataArrayEqual(data['var4'], actual['var4'])
            actual = Dataset.concat(datasets, data[dim], mode='minimal')
            self.assertDataArrayEqual(data['var4'], actual['var4'])

        # verify that the dimension argument takes precedence over
        # concatenating dataset variables of the same name
        dimension = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dimension
        self.assertDatasetIdentical(
            expected, Dataset.concat(datasets, dimension))

        # TODO: factor this into several distinct tests
        data = create_test_data()
        split_data = [data.indexed(dim1=slice(10)),
                      data.indexed(dim1=slice(10, None))]

        with self.assertRaisesRegexp(ValueError, 'must supply at least one'):
            Dataset.concat([], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not all elements in'):
            Dataset.concat(split_data, 'dim1', concat_over=['not_found'])

        with self.assertRaisesRegexp(ValueError, 'global attributes not'):
            data0, data1 = deepcopy(split_data)
            data1.attrs['foo'] = 'bar'
            Dataset.concat([data0, data1], 'dim1', compat='identical')
        self.assertDatasetIdentical(
            data, Dataset.concat([data0, data1], 'dim1', compat='equals'))

        with self.assertRaisesRegexp(ValueError, 'encountered unexpected'):
            data0, data1 = deepcopy(split_data)
            data1['foo'] = ('bar', np.random.randn(10))
            Dataset.concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
            data0, data1 = deepcopy(split_data)
            data1['dim2'] = 2 * data1['dim2']
            Dataset.concat([data0, data1], 'dim1')
Ejemplo n.º 3
0
    def test_concat(self):
        data = create_test_data()

        split_data = [data.indexed_by(dim1=slice(10)),
                      data.indexed_by(dim1=slice(10, None))]
        self.assertDatasetEqual(data, Dataset.concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset({k: v.transpose(*data[k].dimensions)
                           for k, v in dataset.variables.iteritems()},
                           dataset.attributes)

        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [ds for _, ds in data.groupby(dim, squeeze=False)]
            self.assertDatasetEqual(data, Dataset.concat(datasets, dim))
            self.assertDatasetEqual(data, Dataset.concat(datasets, data[dim]))
            self.assertDatasetEqual(data, Dataset.concat(datasets, data[dim].variable))

            datasets = [ds for _, ds in data.groupby(dim, squeeze=True)]
            concat_over = [k for k, v in data.variables.iteritems()
                           if dim in v.dimensions and k != dim]
            actual = Dataset.concat(datasets, data[dim], concat_over=concat_over)
            self.assertDatasetEqual(data, rectify_dim_order(actual))

        # verify that the dimension argument takes precedence over
        # concatenating dataset variables of the same name
        dimension = (2 * data['dim1']).rename('dim1')
        datasets = [ds for _, ds in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dimension
        self.assertDatasetEqual(expected, Dataset.concat(datasets, dimension))

        with self.assertRaisesRegexp(ValueError, 'cannot be empty'):
            Dataset.concat([], 'dim1')
        with self.assertRaisesRegexp(ValueError, 'not all elements in'):
            Dataset.concat(split_data, 'dim1', concat_over=['not_found'])
        with self.assertRaisesRegexp(ValueError, 'global attributes not'):
            data0, data1 = deepcopy(split_data)
            data1.attributes['foo'] = 'bar'
            Dataset.concat([data0, data1], 'dim1')
        with self.assertRaisesRegexp(ValueError, 'encountered unexpected'):
            data0, data1 = deepcopy(split_data)
            data1['foo'] = ('bar', np.random.randn(10))
            Dataset.concat([data0, data1], 'dim1')
        with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
            data0, data1 = deepcopy(split_data)
            data1['dim2'] = 2 * data1['dim2']
            Dataset.concat([data0, data1], 'dim1')