Ejemplo n.º 1
0
    def test_groupby(self):
        data = Dataset({'x': ('x', list('abc')),
                        'c': ('x', [0, 1, 0]),
                        'z': (['x', 'y'], np.random.randn(3, 5))})
        groupby = data.groupby('x')
        self.assertEqual(len(groupby), 3)
        expected_groups = {'a': 0, 'b': 1, 'c': 2}
        self.assertEqual(groupby.groups, expected_groups)
        expected_items = [('a', data.isel(x=0)),
                          ('b', data.isel(x=1)),
                          ('c', data.isel(x=2))]
        self.assertEqual(list(groupby), expected_items)

        identity = lambda x: x
        for k in ['x', 'c', 'y']:
            actual = data.groupby(k, squeeze=False).apply(identity)
            self.assertEqual(data, actual)
Ejemplo n.º 2
0
    def test_groupby(self):
        data = Dataset({'x': ('x', list('abc')),
                        'c': ('x', [0, 1, 0]),
                        'z': (['x', 'y'], np.random.randn(3, 5))})
        groupby = data.groupby('x')
        self.assertEqual(len(groupby), 3)
        expected_groups = {'a': 0, 'b': 1, 'c': 2}
        self.assertEqual(groupby.groups, expected_groups)
        expected_items = [('a', data.indexed(x=0)),
                          ('b', data.indexed(x=1)),
                          ('c', data.indexed(x=2))]
        self.assertEqual(list(groupby), expected_items)

        identity = lambda x: x
        for k in ['x', 'c', 'y']:
            actual = data.groupby(k, squeeze=False).apply(identity)
            self.assertEqual(data, actual)

        data = create_test_data()
        for n, (t, sub) in enumerate(list(data.groupby('dim1'))[:3]):
            self.assertEqual(data['dim1'][n], t)
            self.assertVariableEqual(data['var1'][n], sub['var1'])
            self.assertVariableEqual(data['var2'][n], sub['var2'])
            self.assertVariableEqual(data['var3'][:, n], sub['var3'])

        # TODO: test the other edge cases
        with self.assertRaisesRegexp(ValueError, 'must be 1 dimensional'):
            data.groupby('var1')
        with self.assertRaisesRegexp(ValueError, 'length does not match'):
            data.groupby(data['dim1'][:3])
Ejemplo n.º 3
0
    def test_groupby(self):
        data = Dataset({
            'x': ('x', list('abc')),
            'c': ('x', [0, 1, 0]),
            'z': (['x', 'y'], np.random.randn(3, 5))
        })
        groupby = data.groupby('x')
        self.assertEqual(len(groupby), 3)
        expected_groups = {'a': 0, 'b': 1, 'c': 2}
        self.assertEqual(groupby.groups, expected_groups)
        expected_items = [('a', data.indexed(x=0)), ('b', data.indexed(x=1)),
                          ('c', data.indexed(x=2))]
        self.assertEqual(list(groupby), expected_items)

        identity = lambda x: x
        for k in ['x', 'c', 'y']:
            actual = data.groupby(k, squeeze=False).apply(identity)
            self.assertEqual(data, actual)

        data = create_test_data()
        for n, (t, sub) in enumerate(list(data.groupby('dim1'))[:3]):
            self.assertEqual(data['dim1'][n], t)
            self.assertVariableEqual(data['var1'][n], sub['var1'])
            self.assertVariableEqual(data['var2'][n], sub['var2'])
            self.assertVariableEqual(data['var3'][:, n], sub['var3'])

        # TODO: test the other edge cases
        with self.assertRaisesRegexp(ValueError, 'must be 1 dimensional'):
            data.groupby('var1')
        with self.assertRaisesRegexp(ValueError, 'length does not match'):
            data.groupby(data['dim1'][:3])
Ejemplo n.º 4
0
    def test_groupby_reduce(self):
        data = Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)),
                        'xonly': ('x', np.random.randn(3)),
                        'yonly': ('y', np.random.randn(4)),
                        'letters': ('y', ['a', 'a', 'b', 'b'])})

        expected = data.mean('y')
        actual = data.groupby('x').mean()
        self.assertDatasetAllClose(expected, actual)

        actual = data.groupby('x').mean('y')
        self.assertDatasetAllClose(expected, actual)

        expected = Dataset({'xy': data['xy'].groupby('letters').mean(),
                            'xonly': data['xonly'].mean(),
                            'yonly': data['yonly'].groupby('letters').mean()})
        actual = data.groupby('letters').mean()
        self.assertDatasetAllClose(expected, actual)
Ejemplo n.º 5
0
    def test_groupby_returns_new_type(self):
        data = Dataset({'z': (['x', 'y'], np.random.randn(3, 5))})

        actual = data.groupby('x').apply(lambda ds: ds['z'])
        expected = data['z']
        self.assertDataArrayIdentical(expected, actual)

        actual = data['z'].groupby('x').apply(lambda x: x.to_dataset())
        expected = data
        self.assertDatasetIdentical(expected, actual)
Ejemplo n.º 6
0
class TestDataArray(TestCase):
    def setUp(self):
        self.attrs = {'attr1': 'value1', 'attr2': 2929}
        self.x = np.random.random((10, 20))
        self.v = Variable(['x', 'y'], self.x)
        self.va = Variable(['x', 'y'], self.x, self.attrs)
        self.ds = Dataset({'foo': self.v})
        self.dv = self.ds['foo']

    def test_repr(self):
        v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
        data_array = DataArray(v, {'other': np.int64(0)}, name='my_variable')
        expected = dedent("""\
        <xray.DataArray 'my_variable' (time: 2, x: 3)>
        array([[1, 2, 3],
               [4, 5, 6]])
        Coordinates:
            other    int64 0
          * time     (time) int64 0 1
          * x        (x) int64 0 1 2
        Attributes:
            foo: bar""")
        self.assertEqual(expected, repr(data_array))

    def test_properties(self):
        self.assertVariableEqual(self.dv.variable, self.v)
        self.assertArrayEqual(self.dv.values, self.v.values)
        for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
            self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
        self.assertEqual(len(self.dv), len(self.v))
        self.assertVariableEqual(self.dv, self.v)
        self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords))
        for k, v in iteritems(self.dv.coords):
            self.assertArrayEqual(v, self.ds.coords[k])
        with self.assertRaises(AttributeError):
            self.dv.dataset
        self.assertIsInstance(self.ds['x'].to_index(), pd.Index)
        with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
            self.ds['foo'].to_index()
        with self.assertRaises(AttributeError):
            self.dv.variable = self.v

    def test_name(self):
        arr = self.dv
        self.assertEqual(arr.name, 'foo')

        copied = arr.copy()
        arr.name = 'bar'
        self.assertEqual(arr.name, 'bar')
        self.assertDataArrayEqual(copied, arr)

        actual = DataArray(Coordinate('x', [3]))
        actual.name = 'y'
        expected = DataArray(Coordinate('y', [3]))
        self.assertDataArrayIdentical(actual, expected)

    def test_dims(self):
        arr = self.dv
        self.assertEqual(arr.dims, ('x', 'y'))

        arr.dims = ('w', 'z')
        self.assertEqual(arr.dims, ('w', 'z'))

        x = Dataset({'x': ('x', np.arange(5))})['x']
        x.dims = ('y',)
        self.assertEqual(x.dims, ('y',))
        self.assertEqual(x.name, 'y')

    def test_encoding(self):
        expected = {'foo': 'bar'}
        self.dv.encoding['foo'] = 'bar'
        self.assertEquals(expected, self.dv.encoding)

        expected = {'baz': 0}
        self.dv.encoding = expected
        self.assertEquals(expected, self.dv.encoding)
        self.assertIsNot(expected, self.dv.encoding)

    def test_constructor(self):
        data = np.random.random((2, 3))

        actual = DataArray(data)
        expected = Dataset({None: (['dim_0', 'dim_1'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [['a', 'b'], [-1, -2, -3]])
        expected = Dataset({None: (['dim_0', 'dim_1'], data),
                            'dim_0': ('dim_0', ['a', 'b']),
                            'dim_1': ('dim_1', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [pd.Index(['a', 'b'], name='x'),
                                  pd.Index([-1, -2, -3], name='y')])
        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b']),
                            'y': ('y', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        coords = [['a', 'b'], [-1, -2, -3]]
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = [pd.Index(['a', 'b'], name='A'),
                  pd.Index([-1, -2, -3], name='B')]
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = {'x': ['a', 'b'], 'y': [-1, -2, -3]}
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = [('x', ['a', 'b']), ('y', [-1, -2, -3])]
        actual = DataArray(data, coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, OrderedDict(coords))
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b'])})[None]
        actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'])
        expected = Dataset({None: (['x', 'y'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], name='foo')
        expected = Dataset({'foo': (['x', 'y'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, name='foo')
        expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], attrs={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], encoding={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

    def test_constructor_invalid(self):
        data = np.random.randn(3, 2)

        with self.assertRaisesRegexp(ValueError, 'coords is not dict-like'):
            DataArray(data, [[0, 1, 2]], ['x', 'y'])

        with self.assertRaisesRegexp(ValueError, 'not a subset of the .* dim'):
            DataArray(data, {'x': [0, 1, 2]}, ['a', 'b'])
        with self.assertRaisesRegexp(ValueError, 'not a subset of the .* dim'):
            DataArray(data, {'x': [0, 1, 2]})

        with self.assertRaisesRegexp(TypeError, 'is not a string'):
            DataArray(data, dims=['x', None])

    def test_constructor_from_self_described(self):
        data = [[-0.1, 21], [0, 2]]
        expected = DataArray(data,
                             coords={'x': ['a', 'b'], 'y': [-1, -2]},
                             dims=['x', 'y'], name='foobar',
                             attrs={'bar': 2}, encoding={'foo': 3})
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(expected.values, actual.coords)
        self.assertDataArrayEqual(expected, actual)

        frame = pd.DataFrame(data, index=pd.Index(['a', 'b'], name='x'),
                             columns=pd.Index([-1, -2], name='y'))
        actual = DataArray(frame)
        self.assertDataArrayEqual(expected, actual)

        series = pd.Series(data[0], index=pd.Index([-1, -2], name='y'))
        actual = DataArray(series)
        self.assertDataArrayEqual(expected[0].reset_coords('x', drop=True),
                                  actual)

        panel = pd.Panel({0: frame})
        actual = DataArray(panel)
        expected = DataArray([data], expected.coords, ['dim_0', 'x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        expected = DataArray(data,
                             coords={'x': ['a', 'b'], 'y': [-1, -2],
                                     'a': 0, 'z': ('x', [-0.5, 0.5])},
                             dims=['x', 'y'])
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(expected.values, expected.coords)
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({'foo': ('foo', ['a', 'b'])})['foo']
        actual = DataArray(pd.Index(['a', 'b'], name='foo'))
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(Coordinate('foo', ['a', 'b']))
        self.assertDataArrayIdentical(expected, actual)

        s = pd.Series(range(2), pd.MultiIndex.from_product([['a', 'b'], [0]]))
        with self.assertRaisesRegexp(NotImplementedError, 'MultiIndex'):
            DataArray(s)

    def test_constructor_from_0d(self):
        expected = Dataset({None: ([], 0)})[None]
        actual = DataArray(0)
        self.assertDataArrayIdentical(expected, actual)

    def test_equals_and_identical(self):
        orig = DataArray(np.arange(5.0), {'a': 42}, dims='x')

        expected = orig
        actual = orig.copy()
        self.assertTrue(expected.equals(actual))
        self.assertTrue(expected.identical(actual))

        actual = expected.rename('baz')
        self.assertTrue(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.rename({'x': 'xxx'})
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual.attrs['foo'] = 'bar'
        self.assertTrue(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual['x'] = ('x', -np.arange(5))
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.reset_coords(drop=True)
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = orig.copy()
        actual[0] = np.nan
        expected = actual.copy()
        self.assertTrue(expected.equals(actual))
        self.assertTrue(expected.identical(actual))

        actual[:] = np.nan
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual['a'] = 100000
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

    def test_broadcast_equals(self):
        a = DataArray([0, 0], {'y': 0}, dims='x')
        b = DataArray([0, 0], {'y': ('x', [0, 0])}, dims='x')
        self.assertTrue(a.broadcast_equals(b))
        self.assertTrue(b.broadcast_equals(a))
        self.assertFalse(a.equals(b))
        self.assertFalse(a.identical(b))

        c = DataArray([0], coords={'x': 0}, dims='y')
        self.assertFalse(a.broadcast_equals(c))
        self.assertFalse(c.broadcast_equals(a))

    def test_getitem(self):
        # strings pull out dataarrays
        self.assertDataArrayIdentical(self.dv, self.ds['foo'])
        x = self.dv['x']
        y = self.dv['y']
        self.assertDataArrayIdentical(self.ds['x'], x)
        self.assertDataArrayIdentical(self.ds['y'], y)

        I = ReturnItem()
        for i in [I[:], I[...], I[x.values], I[x.variable], I[x], I[x, y],
                  I[x.values > -1], I[x.variable > -1], I[x > -1],
                  I[x > -1, y > -1]]:
            self.assertVariableEqual(self.dv, self.dv[i])
        for i in [I[0], I[:, 0], I[:3, :2],
                  I[x.values[:3]], I[x.variable[:3]], I[x[:3]], I[x[:3], y[:4]],
                  I[x.values > 3], I[x.variable > 3], I[x > 3], I[x > 3, y > 3]]:
            self.assertVariableEqual(self.v[i], self.dv[i])

    def test_getitem_dict(self):
        actual = self.dv[{'x': slice(3), 'y': 0}]
        expected = self.dv.isel(x=slice(3), y=0)
        self.assertDataArrayIdentical(expected, actual)

    def test_getitem_coords(self):
        orig = DataArray([[10], [20]],
                         {'x': [1, 2], 'y': [3], 'z': 4,
                          'x2': ('x', ['a', 'b']),
                          'y2': ('y', ['c']),
                          'xy': (['y', 'x'], [['d', 'e']])},
                         dims=['x', 'y'])

        self.assertDataArrayIdentical(orig, orig[:])
        self.assertDataArrayIdentical(orig, orig[:, :])
        self.assertDataArrayIdentical(orig, orig[...])
        self.assertDataArrayIdentical(orig, orig[:2, :1])
        self.assertDataArrayIdentical(orig, orig[[0, 1], [0]])

        actual = orig[0, 0]
        expected = DataArray(
            10, {'x': 1, 'y': 3, 'z': 4, 'x2': 'a', 'y2': 'c', 'xy': 'd'})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, :]
        expected = DataArray(
            [10], {'x': 1, 'y': [3], 'z': 4, 'x2': 'a', 'y2': ('y', ['c']),
                   'xy': ('y', ['d'])},
            dims='y')
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[:, 0]
        expected = DataArray(
            [10, 20], {'x': [1, 2], 'y': 3, 'z': 4, 'x2': ('x', ['a', 'b']),
                       'y2': 'c', 'xy': ('x', ['d', 'e'])},
            dims='x')
        self.assertDataArrayIdentical(expected, actual)

    def test_isel(self):
        self.assertDataArrayIdentical(self.dv[0], self.dv.isel(x=0))
        self.assertDataArrayIdentical(self.dv, self.dv.isel(x=slice(None)))
        self.assertDataArrayIdentical(self.dv[:3], self.dv.isel(x=slice(3)))
        self.assertDataArrayIdentical(self.dv[:3, :5],
                                      self.dv.isel(x=slice(3), y=slice(5)))

    def test_sel(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da, da.sel(x=slice(None)))
        self.assertDataArrayIdentical(da[1], da.sel(x='b'))
        self.assertDataArrayIdentical(da[:3], da.sel(x=slice('c')))
        self.assertDataArrayIdentical(da[:3], da.sel(x=['a', 'b', 'c']))
        self.assertDataArrayIdentical(da[:, :4], da.sel(y=(self.ds['y'] < 4)))
        # verify that indexing with a dataarray works
        b = DataArray('b')
        self.assertDataArrayIdentical(da[1], da.sel(x=b))
        self.assertDataArrayIdentical(da[[1]], da.sel(x=slice(b, b)))

    def test_loc(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da[:3], da.loc[:'c'])
        self.assertDataArrayIdentical(da[1], da.loc['b'])
        self.assertDataArrayIdentical(da[1], da.loc[{'x': 'b'}])
        self.assertDataArrayIdentical(da[1], da.loc['b', ...])
        self.assertDataArrayIdentical(da[:3], da.loc[['a', 'b', 'c']])
        self.assertDataArrayIdentical(da[:3, :4],
                                      da.loc[['a', 'b', 'c'], np.arange(4)])
        self.assertDataArrayIdentical(da[:, :4], da.loc[:, self.ds['y'] < 4])
        da.loc['a':'j'] = 0
        self.assertTrue(np.all(da.values == 0))
        da.loc[{'x': slice('a', 'j')}] = 2
        self.assertTrue(np.all(da.values == 2))

    def test_loc_single_boolean(self):
        data = DataArray([0, 1], coords=[[True, False]])
        self.assertEqual(data.loc[True], 0)
        self.assertEqual(data.loc[False], 1)

    def test_time_components(self):
        dates = pd.date_range('2000-01-01', periods=10)
        da = DataArray(np.arange(1, 11), [('time', dates)])

        self.assertArrayEqual(da['time.dayofyear'], da.values)
        self.assertArrayEqual(da.coords['time.dayofyear'], da.values)

    def test_coords(self):
        # use int64 to ensure repr() consistency on windows
        coords = [Coordinate('x', np.array([-1, -2], 'int64')),
                  Coordinate('y', np.array([0, 1, 2], 'int64'))]
        da = DataArray(np.random.randn(2, 3), coords, name='foo')

        self.assertEquals(2, len(da.coords))

        self.assertEqual(['x', 'y'], list(da.coords))

        self.assertTrue(coords[0].identical(da.coords['x']))
        self.assertTrue(coords[1].identical(da.coords['y']))

        self.assertIn('x', da.coords)
        self.assertNotIn(0, da.coords)
        self.assertNotIn('foo', da.coords)

        with self.assertRaises(KeyError):
            da.coords[0]
        with self.assertRaises(KeyError):
            da.coords['foo']

        expected = dedent("""\
        Coordinates:
          * x        (x) int64 -1 -2
          * y        (y) int64 0 1 2""")
        actual = repr(da.coords)
        self.assertEquals(expected, actual)

    def test_coord_coords(self):
        orig = DataArray([10, 20],
                         {'x': [1, 2], 'x2': ('x', ['a', 'b']), 'z': 4},
                         dims='x')

        actual = orig.coords['x']
        expected = DataArray([1, 2], {'z': 4, 'x2': ('x', ['a', 'b'])},
                             dims='x', name='x')
        self.assertDataArrayIdentical(expected, actual)

        del actual.coords['x2']
        self.assertDataArrayIdentical(
            expected.reset_coords('x2', drop=True), actual)

        actual.coords['x3'] = ('x', ['a', 'b'])
        expected = DataArray([1, 2], {'z': 4, 'x3': ('x', ['a', 'b'])},
                             dims='x', name='x')
        self.assertDataArrayIdentical(expected, actual)

    def test_reset_coords(self):
        data = DataArray(np.zeros((3, 4)),
                         {'bar': ('x', ['a', 'b', 'c']),
                          'baz': ('y', range(4))},
                         dims=['x', 'y'],
                         name='foo')

        actual = data.reset_coords()
        expected = Dataset({'foo': (['x', 'y'], np.zeros((3, 4))),
                            'bar': ('x', ['a', 'b', 'c']),
                            'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar', 'baz'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords('bar')
        expected = Dataset({'foo': (['x', 'y'], np.zeros((3, 4))),
                            'bar': ('x', ['a', 'b', 'c'])},
                           {'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(drop=True)
        expected = DataArray(np.zeros((3, 4)), dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        actual = data.copy()
        actual.reset_coords(drop=True, inplace=True)
        self.assertDataArrayIdentical(actual, expected)

        actual = data.reset_coords('bar', drop=True)
        expected = DataArray(np.zeros((3, 4)), {'baz': ('y', range(4))},
                             dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        with self.assertRaisesRegexp(ValueError, 'cannot reset coord'):
            data.reset_coords(inplace=True)
        with self.assertRaises(KeyError):
            data.reset_coords('foo', drop=True)
        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
            data.reset_coords('not_found')
        with self.assertRaisesRegexp(ValueError, 'cannot remove index'):
            data.reset_coords('y')

    def test_reindex(self):
        foo = self.dv
        bar = self.dv[:2, :2]
        self.assertDataArrayIdentical(foo.reindex_like(bar), bar)

        expected = foo.copy()
        expected[:] = np.nan
        expected[:2, :2] = bar
        self.assertDataArrayIdentical(bar.reindex_like(foo), expected)

        # regression test for #279
        expected = DataArray(np.random.randn(5), dims=["time"])
        time2 = DataArray(np.arange(5), dims="time2")
        actual = expected.reindex(time=time2)
        self.assertDataArrayIdentical(actual, expected)

    def test_reindex_method(self):
        x = DataArray([10, 20], dims='y')
        y = [-0.5, 0.5, 1.5]
        actual = x.reindex(y=y, method='backfill')
        expected = DataArray([10, 20, np.nan], coords=[('y', y)])
        self.assertDataArrayIdentical(expected, actual)

        alt = Dataset({'y': y})
        actual = x.reindex_like(alt, method='backfill')
        self.assertDatasetIdentical(expected, actual)

    def test_rename(self):
        renamed = self.dv.rename('bar')
        self.assertDatasetIdentical(
            renamed.to_dataset(), self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

        renamed = self.dv.rename({'foo': 'bar'})
        self.assertDatasetIdentical(
            renamed.to_dataset(), self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

    def test_dataset_getitem(self):
        dv = self.ds['foo']
        self.assertDataArrayIdentical(dv, self.dv)

    def test_array_interface(self):
        self.assertArrayEqual(np.asarray(self.dv), self.x)
        # test patched in methods
        self.assertArrayEqual(self.dv.astype(float), self.v.astype(float))
        self.assertVariableEqual(self.dv.argsort(), self.v.argsort())
        self.assertVariableEqual(self.dv.clip(2, 3), self.v.clip(2, 3))
        # test ufuncs
        expected = deepcopy(self.ds)
        expected['foo'][:] = np.sin(self.x)
        self.assertDataArrayEqual(expected['foo'], np.sin(self.dv))
        self.assertDataArrayEqual(self.dv, np.maximum(self.v, self.dv))
        bar = Variable(['x', 'y'], np.zeros((10, 20)))
        self.assertDataArrayEqual(self.dv, np.maximum(self.dv, bar))

    def test_is_null(self):
        x = np.random.RandomState(42).randn(5, 6)
        x[x < 0] = np.nan
        original = DataArray(x, [-np.arange(5), np.arange(6)], ['x', 'y'])
        expected = DataArray(pd.isnull(x), [-np.arange(5), np.arange(6)],
                             ['x', 'y'])
        self.assertDataArrayIdentical(expected, original.isnull())
        self.assertDataArrayIdentical(~expected, original.notnull())

    def test_math(self):
        x = self.x
        v = self.v
        a = self.dv
        # variable math was already tested extensively, so let's just make sure
        # that all types are properly converted here
        self.assertDataArrayEqual(a, +a)
        self.assertDataArrayEqual(a, a + 0)
        self.assertDataArrayEqual(a, 0 + a)
        self.assertDataArrayEqual(a, a + 0 * v)
        self.assertDataArrayEqual(a, 0 * v + a)
        self.assertDataArrayEqual(a, a + 0 * x)
        self.assertDataArrayEqual(a, 0 * x + a)
        self.assertDataArrayEqual(a, a + 0 * a)
        self.assertDataArrayEqual(a, 0 * a + a)

    def test_math_automatic_alignment(self):
        a = DataArray(range(5), [('x', range(5))])
        b = DataArray(range(5), [('x', range(1, 6))])
        expected = DataArray(np.ones(4), [('x', [1, 2, 3, 4])])
        self.assertDataArrayIdentical(a - b, expected)

        with self.assertRaisesRegexp(ValueError, 'no overlapping labels'):
            a.isel(x=slice(2)) + a.isel(x=slice(2, None))

    def test_inplace_math_basics(self):
        x = self.x
        v = self.v
        a = self.dv
        b = a
        b += 1
        self.assertIs(b, a)
        self.assertIs(b.variable, v)
        self.assertArrayEqual(b.values, x)
        self.assertIs(source_ndarray(b.values), x)
        self.assertDatasetIdentical(b._dataset, self.ds)

    def test_inplace_math_automatic_alignment(self):
        a = DataArray(range(5), [('x', range(5))])
        b = DataArray(range(1, 6), [('x', range(1, 6))])
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            a += b
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            b += a

    def test_math_name(self):
        # Verify that name is preserved only when it can be done unambiguously.
        # The rule (copied from pandas.Series) is keep the current name only if
        # the other object has the same name or no name attribute and this
        # object isn't a coordinate; otherwise reset to None.
        a = self.dv
        self.assertEqual((+a).name, 'foo')
        self.assertEqual((a + 0).name, 'foo')
        self.assertIs((a + a.rename(None)).name, None)
        self.assertIs((a + a.rename('bar')).name, None)
        self.assertEqual((a + a).name, 'foo')
        self.assertIs((+a['x']).name, None)
        self.assertIs((a['x'] + 0).name, None)
        self.assertIs((a + a['x']).name, None)

    def test_math_with_coords(self):
        coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
                  'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
                  'c': -999}
        orig = DataArray(np.random.randn(2, 3), coords, dims=['x', 'y'])

        actual = orig + 1
        expected = DataArray(orig.values + 1, orig.coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = 1 + orig
        self.assertDataArrayIdentical(expected, actual)

        actual = orig + orig[0, 0]
        exp_coords = dict((k, v) for k, v in coords.items() if k != 'lat')
        expected = DataArray(orig.values + orig.values[0, 0],
                             exp_coords, dims=['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, 0] + orig
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, 0] + orig[-1, -1]
        expected = DataArray(orig.values[0, 0] + orig.values[-1, -1],
                             {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[:, 0] + orig[0, :]
        exp_values = orig[:, 0].values[:, None] + orig[0, :].values[None, :]
        expected = DataArray(exp_values, exp_coords, dims=['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, :] + orig[:, 0]
        self.assertDataArrayIdentical(expected.T, actual)

        actual = orig - orig.T
        expected = DataArray(np.zeros((2, 3)), orig.coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.T - orig
        self.assertDataArrayIdentical(expected.T, actual)

        alt = DataArray([1, 1], {'x': [-1, -2], 'c': 'foo', 'd': 555}, 'x')
        actual = orig + alt
        expected = orig + 1
        expected.coords['d'] = 555
        del expected.coords['c']
        self.assertDataArrayIdentical(expected, actual)

        actual = alt + orig
        self.assertDataArrayIdentical(expected, actual)

    def test_index_math(self):
        orig = DataArray(range(3), dims='x', name='x')
        actual = orig + 1
        expected = DataArray(1 + np.arange(3), coords=[('x', range(3))])
        self.assertDataArrayIdentical(expected, actual)

        # regression tests for #254
        actual = orig[0] < orig
        expected = DataArray([False, True, True], coords=[('x', range(3))])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig > orig[0]
        self.assertDataArrayIdentical(expected, actual)

    def test_dataset_math(self):
        # more comprehensive tests with multiple dataset variables
        obs = Dataset({'tmin': ('x', np.arange(5)),
                       'tmax': ('x', 10 + np.arange(5))},
                      {'x': ('x', 0.5 * np.arange(5)),
                       'loc': ('x', range(-2, 3))})

        actual = 2 * obs['tmax']
        expected = DataArray(2 * (10 + np.arange(5)), obs.coords, name='tmax')
        self.assertDataArrayIdentical(actual, expected)

        actual = obs['tmax'] - obs['tmin']
        expected = DataArray(10 * np.ones(5), obs.coords)
        self.assertDataArrayIdentical(actual, expected)

        sim = Dataset({'tmin': ('x', 1 + np.arange(5)),
                       'tmax': ('x', 11 + np.arange(5)),
                       # does *not* include 'loc' as a coordinate
                       'x': ('x', 0.5 * np.arange(5))})

        actual = sim['tmin'] - obs['tmin']
        expected = DataArray(np.ones(5), obs.coords, name='tmin')
        self.assertDataArrayIdentical(actual, expected)

        actual = -obs['tmin'] + sim['tmin']
        self.assertDataArrayIdentical(actual, expected)

        actual = sim['tmin'].copy()
        actual -= obs['tmin']
        self.assertDataArrayIdentical(actual, expected)

        actual = sim.copy()
        actual['tmin'] = sim['tmin'] - obs['tmin']
        expected = Dataset({'tmin': ('x', np.ones(5)),
                            'tmax': ('x', sim['tmax'].values)},
                            obs.coords)
        self.assertDatasetIdentical(actual, expected)

        actual = sim.copy()
        actual['tmin'] -= obs['tmin']
        self.assertDatasetIdentical(actual, expected)

    def test_transpose(self):
        self.assertVariableEqual(self.dv.variable.transpose(),
                                 self.dv.transpose())

    def test_squeeze(self):
        self.assertVariableEqual(self.dv.variable.squeeze(), self.dv.squeeze())

    def test_drop_coordinates(self):
        expected = DataArray(np.random.randn(2, 3), dims=['x', 'y'])
        arr = expected.copy()
        arr.coords['z'] = 2
        actual = arr.drop('z')
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaises(ValueError):
            arr.drop('not found')

        with self.assertRaisesRegexp(ValueError, 'cannot drop'):
            arr.drop(None)

        renamed = arr.rename('foo')
        with self.assertRaisesRegexp(ValueError, 'cannot drop'):
            renamed.drop('foo')

    def test_drop_index_labels(self):
        arr = DataArray(np.random.randn(2, 3), dims=['x', 'y'])
        actual = arr.drop([0, 1], dim='y')
        expected = arr[:, 2:]
        self.assertDataArrayIdentical(expected, actual)

    def test_dropna(self):
        x = np.random.randn(4, 4)
        x[::2, 0] = np.nan
        arr = DataArray(x, dims=['a', 'b'])

        actual = arr.dropna('a')
        expected = arr[1::2]
        self.assertDataArrayIdentical(actual, expected)

        actual = arr.dropna('b', how='all')
        self.assertDataArrayIdentical(actual, arr)

        actual = arr.dropna('a', thresh=1)
        self.assertDataArrayIdentical(actual, arr)

        actual = arr.dropna('b', thresh=3)
        expected = arr[:, 1:]
        self.assertDataArrayIdentical(actual, expected)

    def test_reduce(self):
        coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
                  'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
                  'c': -999}
        orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

        actual = orig.mean()
        expected = DataArray(0, {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean(['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean('x')
        expected = DataArray([-2, 0, 2], {'y': coords['y'], 'c': -999}, 'y')
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean(['x'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean('y')
        expected = DataArray([0, 0], {'x': coords['x'], 'c': -999}, 'x')
        self.assertDataArrayIdentical(expected, actual)

        self.assertVariableEqual(self.dv.reduce(np.mean, 'x'),
                                 self.v.reduce(np.mean, 'x'))

        orig = DataArray([[1, 0, np.nan], [3, 0, 3]], coords, dims=['x', 'y'])
        actual = orig.count()
        expected = DataArray(5, {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

    def test_reduce_keep_attrs(self):
        # Test dropped attrs
        vm = self.va.mean()
        self.assertEqual(len(vm.attrs), 0)
        self.assertEqual(vm.attrs, OrderedDict())

        # Test kept attrs
        vm = self.va.mean(keep_attrs=True)
        self.assertEqual(len(vm.attrs), len(self.attrs))
        self.assertEqual(vm.attrs, self.attrs)

    def test_groupby_iter(self):
        for ((act_x, act_dv), (exp_x, exp_ds)) in \
                zip(self.dv.groupby('y'), self.ds.groupby('y')):
            self.assertEqual(exp_x, act_x)
            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
        for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv):
            self.assertDataArrayIdentical(exp_dv, act_dv)

    def make_groupby_example_array(self):
        da = self.dv.copy()
        da.coords['abc'] = ('y', np.array(['a'] * 9 + ['c'] + ['b'] * 10))
        da.coords['y'] = 20 + 100 * da['y']
        return da

    def test_groupby_properties(self):
        grouped = self.make_groupby_example_array().groupby('abc')
        expected_unique = Variable('abc', ['a', 'b', 'c'])
        self.assertVariableEqual(expected_unique, grouped.unique_coord)
        self.assertEqual(3, len(grouped))

    def test_groupby_apply_identity(self):
        expected = self.make_groupby_example_array()
        idx = expected.coords['y']
        identity = lambda x: x
        for g in ['x', 'y', 'abc', idx]:
            for shortcut in [False, True]:
                for squeeze in [False, True]:
                    grouped = expected.groupby(g, squeeze=squeeze)
                    actual = grouped.apply(identity, shortcut=shortcut)
                    self.assertDataArrayIdentical(expected, actual)

    def test_groupby_sum(self):
        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_sum_all = Dataset(
            {'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
                                                self.x[:, 10:].sum(),
                                                self.x[:, 9:10].sum()]).T),
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())

        expected = DataArray([array['y'].values[idx].sum() for idx
                              in [slice(9), slice(10, None), slice(9, 10)]],
                             [['a', 'b', 'c']], ['abc'])
        actual = array['y'].groupby('abc').apply(np.sum)
        self.assertDataArrayAllClose(expected, actual)
        actual = array['y'].groupby('abc').sum()
        self.assertDataArrayAllClose(expected, actual)

        expected_sum_axis1 = Dataset(
            {'foo': (['x', 'abc'], np.array([self.x[:, :9].sum(1),
                                             self.x[:, 10:].sum(1),
                                             self.x[:, 9:10].sum(1)]).T),
             'x': self.ds['x'],
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_axis1,
                                     grouped.reduce(np.sum, 'y'))
        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))

    def test_groupby_count(self):
        array = DataArray([0, 0, np.nan, np.nan, 0, 0],
                          coords={'cat': ('x', ['a', 'b', 'b', 'c', 'c', 'c'])},
                          dims='x')
        actual = array.groupby('cat').count()
        expected = DataArray([1, 1, 2], coords=[('cat', ['a', 'b', 'c'])])
        self.assertDataArrayIdentical(actual, expected)

    @unittest.skip('needs to be fixed for shortcut=False, keep_attrs=False')
    def test_groupby_reduce_attrs(self):
        array = self.make_groupby_example_array()
        array.attrs['foo'] = 'bar'

        for shortcut in [True, False]:
            for keep_attrs in [True, False]:
                print('shortcut=%s, keep_attrs=%s' % (shortcut, keep_attrs))
                actual = array.groupby('abc').reduce(
                    np.mean, keep_attrs=keep_attrs, shortcut=shortcut)
                expected = array.groupby('abc').mean()
                if keep_attrs:
                    expected.attrs['foo'] = 'bar'
                self.assertDataArrayIdentical(expected, actual)

    def test_groupby_apply_center(self):
        def center(x):
            return x - np.mean(x)

        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_ds = array.to_dataset()
        exp_data = np.hstack([center(self.x[:, :9]),
                              center(self.x[:, 9:10]),
                              center(self.x[:, 10:])])
        expected_ds['foo'] = (['x', 'y'], exp_data)
        expected_centered = expected_ds['foo']
        self.assertDataArrayAllClose(expected_centered, grouped.apply(center))

    def test_groupby_apply_ndarray(self):
        # regression test for #326
        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')
        actual = grouped.apply(np.asarray)
        self.assertDataArrayEqual(array, actual)

    def test_groupby_apply_changes_metadata(self):
        def change_metadata(x):
            x.coords['x'] = x.coords['x'] * 2
            x.attrs['fruit'] = 'lemon'
            return x

        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')
        actual = grouped.apply(change_metadata)
        expected = array.copy()
        expected = change_metadata(expected)
        self.assertDataArrayEqual(expected, actual)

    def test_groupby_math(self):
        array = self.make_groupby_example_array()
        for squeeze in [True, False]:
            grouped = array.groupby('x', squeeze=squeeze)

            expected = array + array.coords['x']
            actual = grouped + array.coords['x']
            self.assertDataArrayIdentical(expected, actual)

            actual = array.coords['x'] + grouped
            self.assertDataArrayIdentical(expected, actual)

            ds = array.coords['x'].to_dataset()
            expected = array + ds
            actual = grouped + ds
            self.assertDatasetIdentical(expected, actual)

            actual = ds + grouped
            self.assertDatasetIdentical(expected, actual)

        grouped = array.groupby('abc')
        expected_agg = (grouped.mean() - np.arange(3)).rename(None)
        actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])])
        actual_agg = actual.groupby('abc').mean()
        self.assertDataArrayAllClose(expected_agg, actual_agg)

        with self.assertRaisesRegexp(TypeError, 'only support arithmetic'):
            grouped + 1
        with self.assertRaisesRegexp(TypeError, 'only support arithmetic'):
            grouped + grouped

    def test_groupby_restore_dim_order(self):
        array = DataArray(np.random.randn(5, 3),
                          coords={'a': ('x', range(5)), 'b': ('y', range(3))},
                          dims=['x', 'y'])
        for by, expected_dims in [('x', ('x', 'y')),
                                  ('y', ('x', 'y')),
                                  ('a', ('a', 'y')),
                                  ('b', ('x', 'b'))]:
            result = array.groupby(by).apply(lambda x: x.squeeze())
            self.assertEqual(result.dims, expected_dims)

    def test_groupby_first_and_last(self):
        array = DataArray([1, 2, 3, 4, 5], dims='x')
        by = DataArray(['a'] * 2 + ['b'] * 3, dims='x', name='ab')

        expected = DataArray([1, 3], [('ab', ['a', 'b'])])
        actual = array.groupby(by).first()
        self.assertDataArrayIdentical(expected, actual)

        expected = DataArray([2, 5], [('ab', ['a', 'b'])])
        actual = array.groupby(by).last()
        self.assertDataArrayIdentical(expected, actual)

        array = DataArray(np.random.randn(5, 3), dims=['x', 'y'])
        expected = DataArray(array[[0, 2]], {'ab': ['a', 'b']}, ['ab', 'y'])
        actual = array.groupby(by).first()
        self.assertDataArrayIdentical(expected, actual)

        actual = array.groupby('x').first()
        expected = array # should be a no-op
        self.assertDataArrayIdentical(expected, actual)

    def test_resample(self):
        times = pd.date_range('2000-01-01', freq='6H', periods=10)
        array = DataArray(np.arange(10), [('time', times)])

        actual = array.resample('6H', dim='time')
        self.assertDataArrayIdentical(array, actual)

        actual = array.resample('24H', dim='time')
        expected = DataArray(array.to_series().resample('24H'))
        self.assertDataArrayIdentical(expected, actual)

        actual = array.resample('24H', dim='time', how=np.mean)
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'index must be monotonic'):
            array[[2, 0, 1]].resample('1D', dim='time')

    def test_resample_first(self):
        times = pd.date_range('2000-01-01', freq='6H', periods=10)
        array = DataArray(np.arange(10), [('time', times)])

        actual = array.resample('1D', dim='time', how='first')
        expected = DataArray([0, 4, 8], [('time', times[::4])])
        self.assertDataArrayIdentical(expected, actual)

        # verify that labels don't use the first value
        actual = array.resample('24H', dim='time', how='first')
        expected = DataArray(array.to_series().resample('24H', how='first'))
        self.assertDataArrayIdentical(expected, actual)

        # missing values
        array = array.astype(float)
        array[:2] = np.nan
        actual = array.resample('1D', dim='time', how='first')
        expected = DataArray([2, 4, 8], [('time', times[::4])])
        self.assertDataArrayIdentical(expected, actual)

        actual = array.resample('1D', dim='time', how='first', skipna=False)
        expected = DataArray([np.nan, 4, 8], [('time', times[::4])])
        self.assertDataArrayIdentical(expected, actual)

    def test_resample_skipna(self):
        times = pd.date_range('2000-01-01', freq='6H', periods=10)
        array = DataArray(np.ones(10), [('time', times)])
        array[1] = np.nan

        actual = array.resample('1D', dim='time', skipna=False)
        expected = DataArray([np.nan, 1, 1], [('time', times[::4])])
        self.assertDataArrayIdentical(expected, actual)

    def test_resample_upsampling(self):
        times = pd.date_range('2000-01-01', freq='1D', periods=5)
        array = DataArray(np.arange(5), [('time', times)])

        expected_time = pd.date_range('2000-01-01', freq='12H', periods=9)
        expected = array.reindex(time=expected_time)
        for how in ['mean', 'median', 'sum', 'first', 'last', np.mean]:
            actual = array.resample('12H', 'time', how=how)
            self.assertDataArrayIdentical(expected, actual)

    def test_concat(self):
        self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20))
        foo = self.ds['foo']
        bar = self.ds['bar']
        # from dataset array:
        expected = DataArray(np.array([foo.values, bar.values]),
                             dims=['w', 'x', 'y'])
        actual = concat([foo, bar], 'w')
        self.assertDataArrayEqual(expected, actual)
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = concat(grouped, self.ds['x'])
        self.assertDataArrayIdentical(foo, stacked)
        # with an index as the 'dim' argument
        stacked = concat(grouped, self.ds.indexes['x'])
        self.assertDataArrayIdentical(foo, stacked)

        actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        actual = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'not identical'):
            concat([foo, bar], compat='identical')

    def test_align(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        dv1, dv2 = align(self.dv, self.dv[:5], join='inner')
        self.assertDataArrayIdentical(dv1, self.dv[:5])
        self.assertDataArrayIdentical(dv2, self.dv[:5])

    def test_align_dtype(self):
        # regression test for #264
        x1 = np.arange(30)
        x2 = np.arange(5, 35)
        a = DataArray(np.random.random((30,)).astype('f32'), {'x': x1})
        b = DataArray(np.random.random((30,)).astype('f32'), {'x': x2})
        c, d = align(a, b, join='outer')
        self.assertEqual(c.dtype, np.float32)

    def test_broadcast_arrays(self):
        x = DataArray([1, 2], coords=[('a', [-1, -2])], name='x')
        y = DataArray([1, 2], coords=[('b', [3, 4])], name='y')
        x2, y2 = broadcast_arrays(x, y)
        expected_coords = [('a', [-1, -2]), ('b', [3, 4])]
        expected_x2 = DataArray([[1, 1], [2, 2]], expected_coords, name='x')
        expected_y2 = DataArray([[1, 2], [1, 2]], expected_coords, name='y')
        self.assertDataArrayIdentical(expected_x2, x2)
        self.assertDataArrayIdentical(expected_y2, y2)

        x = DataArray(np.random.randn(2, 3), dims=['a', 'b'])
        y = DataArray(np.random.randn(3, 2), dims=['b', 'a'])
        x2, y2 = broadcast_arrays(x, y)
        expected_x2 = x
        expected_y2 = y.T
        self.assertDataArrayIdentical(expected_x2, x2)
        self.assertDataArrayIdentical(expected_y2, y2)

        with self.assertRaisesRegexp(ValueError, 'cannot broadcast'):
            z = DataArray([1, 2], coords=[('a', [-10, 20])])
            broadcast_arrays(x, z)

    def test_to_pandas(self):
        # 0d
        actual = DataArray(42).to_pandas()
        expected = np.array(42)
        self.assertArrayEqual(actual, expected)

        # 1d
        values = np.random.randn(3)
        index = pd.Index(['a', 'b', 'c'], name='x')
        da = DataArray(values, coords=[index])
        actual = da.to_pandas()
        self.assertArrayEqual(actual.values, values)
        self.assertArrayEqual(actual.index, index)
        self.assertArrayEqual(actual.index.name, 'x')

        # 2d
        values = np.random.randn(3, 2)
        da = DataArray(values, coords=[('x', ['a', 'b', 'c']), ('y', [0, 1])],
                       name='foo')
        actual = da.to_pandas()
        self.assertArrayEqual(actual.values, values)
        self.assertArrayEqual(actual.index, ['a', 'b', 'c'])
        self.assertArrayEqual(actual.columns, [0, 1])

        # roundtrips
        for shape in [(3,), (3, 4), (3, 4, 5)]:
            dims = list('abc')[:len(shape)]
            da = DataArray(np.random.randn(*shape), dims=dims)
            roundtripped = DataArray(da.to_pandas())
            self.assertDataArrayIdentical(da, roundtripped)

        with self.assertRaisesRegexp(ValueError, 'cannot convert'):
            DataArray(np.random.randn(1, 2, 3, 4, 5)).to_pandas()

    def test_to_dataframe(self):
        # regression test for #260
        arr = DataArray(np.random.randn(3, 4),
                        [('B', [1, 2, 3]), ('A', list('cdef'))])
        expected = arr.to_series()
        actual = arr.to_dataframe()[None]
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.name, actual.name)
        self.assertArrayEqual(expected.index.values, actual.index.values)

        # regression test for coords with different dimensions
        arr.coords['C'] = ('B', [-1, -2, -3])
        expected = arr.to_series().to_frame()
        expected['C'] = [-1] * 4 + [-2] * 4 + [-3] * 4
        expected.columns = [None, 'C']
        actual = arr.to_dataframe()
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.columns.values, actual.columns.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)

    def test_to_and_from_series(self):
        expected = self.dv.to_dataframe()['foo']
        actual = self.dv.to_series()
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)
        self.assertEqual('foo', actual.name)
        # test roundtrip
        self.assertDataArrayIdentical(self.dv, DataArray.from_series(actual))
        # test name is None
        actual.name = None
        expected_da = self.dv.rename(None)
        self.assertDataArrayIdentical(expected_da,
                                      DataArray.from_series(actual))

    def test_to_and_from_cdms2(self):
        try:
            import cdms2
        except ImportError:
            raise unittest.SkipTest('cdms2 not installed')

        original = DataArray(np.arange(6).reshape(2, 3),
                             [('distance', [-2, 2], {'units': 'meters'}),
                              ('time', pd.date_range('2000-01-01', periods=3))],
                             name='foo', attrs={'baz': 123})
        expected_coords = [Coordinate('distance', [-2, 2]),
                           Coordinate('time', [0, 1, 2])]
        actual = original.to_cdms2()
        self.assertArrayEqual(actual, original)
        self.assertEqual(actual.id, original.name)
        self.assertItemsEqual(actual.getAxisIds(), original.dims)
        for axis, coord in zip(actual.getAxisList(), expected_coords):
            self.assertEqual(axis.id, coord.name)
            self.assertArrayEqual(axis, coord.values)
        self.assertEqual(actual.baz, original.attrs['baz'])

        component_times = actual.getAxis(1).asComponentTime()
        self.assertEqual(len(component_times), 3)
        self.assertEqual(str(component_times[0]), '2000-1-1 0:0:0.0')

        roundtripped = DataArray.from_cdms2(actual)
        self.assertDataArrayIdentical(original, roundtripped)

    def test_to_dataset(self):
        unnamed = DataArray([1, 2], dims='x')
        actual = unnamed.to_dataset()
        expected = Dataset({None: ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)
        self.assertIsNot(unnamed._dataset, actual)

        actual = unnamed.to_dataset('foo')
        expected = Dataset({'foo': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)

        named = DataArray([1, 2], dims='x', name='foo')
        actual = named.to_dataset()
        expected = Dataset({'foo': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)

        actual = named.to_dataset('bar')
        expected = Dataset({'bar': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)
Ejemplo n.º 7
0
class TestDataArray(TestCase):
    def setUp(self):
        self.attrs = {'attr1': 'value1', 'attr2': 2929}
        self.x = np.random.random((10, 20))
        self.v = Variable(['x', 'y'], self.x)
        self.va = Variable(['x', 'y'], self.x, self.attrs)
        self.ds = Dataset({'foo': self.v})
        self.dv = self.ds['foo']

    def test_repr(self):
        v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
        data_array = DataArray(v, {'other': ([], 0)}, name='my_variable')
        expected = dedent("""\
        <xray.DataArray 'my_variable' (time: 2, x: 3)>
        array([[1, 2, 3],
               [4, 5, 6]])
        Coordinates:
            other    int64 0
          * time     (time) int64 0 1
          * x        (x) int64 0 1 2
        Attributes:
            foo: bar""")
        self.assertEqual(expected, repr(data_array))

    def test_properties(self):
        self.assertVariableEqual(self.dv.variable, self.v)
        self.assertArrayEqual(self.dv.values, self.v.values)
        for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
            self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
        self.assertEqual(len(self.dv), len(self.v))
        self.assertVariableEqual(self.dv, self.v)
        self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords))
        for k, v in iteritems(self.dv.coords):
            self.assertArrayEqual(v, self.ds.coords[k])
        with self.assertRaises(AttributeError):
            self.dv.dataset = self.ds
        self.assertIsInstance(self.ds['x'].to_index(), pd.Index)
        with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
            self.ds['foo'].to_index()
        with self.assertRaises(AttributeError):
            self.dv.variable = self.v

    def test_name(self):
        arr = self.dv
        self.assertEqual(arr.name, 'foo')

        copied = arr.copy()
        arr.name = 'bar'
        self.assertEqual(arr.name, 'bar')
        self.assertDataArrayEqual(copied, arr)

        actual = DataArray(Coordinate('x', [3]))
        actual.name = 'y'
        expected = DataArray(Coordinate('y', [3]))
        self.assertDataArrayIdentical(actual, expected)

    def test_dims(self):
        arr = self.dv
        self.assertEqual(arr.dims, ('x', 'y'))

        arr.dims = ('w', 'z')
        self.assertEqual(arr.dims, ('w', 'z'))

        x = Dataset({'x': ('x', np.arange(5))})['x']
        x.dims = ('y',)
        self.assertEqual(x.dims, ('y',))
        self.assertEqual(x.name, 'y')

    def test_encoding(self):
        expected = {'foo': 'bar'}
        self.dv.encoding['foo'] = 'bar'
        self.assertEquals(expected, self.dv.encoding)

        expected = {'baz': 0}
        self.dv.encoding = expected
        self.assertEquals(expected, self.dv.encoding)
        self.assertIsNot(expected, self.dv.encoding)

    def test_constructor(self):
        data = np.random.random((2, 3))

        actual = DataArray(data)
        expected = Dataset({None: (['dim_0', 'dim_1'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [['a', 'b'], [-1, -2, -3]])
        expected = Dataset({None: (['dim_0', 'dim_1'], data),
                            'dim_0': ('dim_0', ['a', 'b']),
                            'dim_1': ('dim_1', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [pd.Index(['a', 'b'], name='x'),
                                  pd.Index([-1, -2, -3], name='y')])
        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b']),
                            'y': ('y', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        coords = [['a', 'b'], [-1, -2, -3]]
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = [pd.Index(['a', 'b'], name='A'),
                  pd.Index([-1, -2, -3], name='B')]
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = {'x': ['a', 'b'], 'y': [-1, -2, -3]}
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = [('x', ['a', 'b']), ('y', [-1, -2, -3])]
        actual = DataArray(data, coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, OrderedDict(coords))
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b'])})[None]
        actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'])
        expected = Dataset({None: (['x', 'y'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], name='foo')
        expected = Dataset({'foo': (['x', 'y'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, name='foo')
        expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], attrs={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], encoding={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

    def test_constructor_invalid(self):
        data = np.random.randn(3, 2)

        with self.assertRaisesRegexp(ValueError, 'coords is not dict-like'):
            DataArray(data, [[0, 1, 2]], ['x', 'y'])

        with self.assertRaisesRegexp(ValueError, 'not a subset of the .* dim'):
            DataArray(data, {'x': [0, 1, 2]}, ['a', 'b'])
        with self.assertRaisesRegexp(ValueError, 'not a subset of the .* dim'):
            DataArray(data, {'x': [0, 1, 2]})

        with self.assertRaisesRegexp(TypeError, 'is not a string'):
            DataArray(data, dims=['x', None])

    def test_constructor_from_self_described(self):
        data = [[-0.1, 21], [0, 2]]
        expected = DataArray(data,
                             coords={'x': ['a', 'b'], 'y': [-1, -2]},
                             dims=['x', 'y'], name='foobar',
                             attrs={'bar': 2}, encoding={'foo': 3})
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(expected.values, actual.coords)
        self.assertDataArrayEqual(expected, actual)

        frame = pd.DataFrame(data, index=pd.Index(['a', 'b'], name='x'),
                             columns=pd.Index([-1, -2], name='y'))
        actual = DataArray(frame)
        self.assertDataArrayEqual(expected, actual)

        series = pd.Series(data[0], index=pd.Index([-1, -2], name='y'))
        actual = DataArray(series)
        self.assertDataArrayEqual(expected[0].reset_coords('x', drop=True),
                                  actual)

        panel = pd.Panel({0: frame})
        actual = DataArray(panel)
        expected = DataArray([data], expected.coords, ['dim_0', 'x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        expected = DataArray(data,
                             coords={'x': ['a', 'b'], 'y': [-1, -2],
                                     'a': 0, 'z': ('x', [-0.5, 0.5])},
                             dims=['x', 'y'])
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(expected.values, expected.coords)
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({'foo': ('foo', ['a', 'b'])})['foo']
        actual = DataArray(pd.Index(['a', 'b'], name='foo'))
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(Coordinate('foo', ['a', 'b']))
        self.assertDataArrayIdentical(expected, actual)

        s = pd.Series(range(2), pd.MultiIndex.from_product([['a', 'b'], [0]]))
        with self.assertRaisesRegexp(NotImplementedError, 'MultiIndex'):
            DataArray(s)

    def test_constructor_from_0d(self):
        expected = Dataset({None: ([], 0)})[None]
        actual = DataArray(0)
        self.assertDataArrayIdentical(expected, actual)

    def test_equals_and_identical(self):
        orig = DataArray(np.arange(5.0), {'a': 42}, dims='x')

        expected = orig
        actual = orig.copy()
        self.assertTrue(expected.equals(actual))
        self.assertTrue(expected.identical(actual))

        actual = expected.rename('baz')
        self.assertTrue(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.rename({'x': 'xxx'})
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual.attrs['foo'] = 'bar'
        self.assertTrue(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual['x'] = ('x', -np.arange(5))
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.reset_coords(drop=True)
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = orig.copy()
        actual[0] = np.nan
        expected = actual.copy()
        self.assertTrue(expected.equals(actual))
        self.assertTrue(expected.identical(actual))

        actual[:] = np.nan
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual['a'] = 100000
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

    def test_getitem(self):
        # strings pull out dataarrays
        self.assertDataArrayIdentical(self.dv, self.ds['foo'])
        x = self.dv['x']
        y = self.dv['y']
        self.assertDataArrayIdentical(self.ds['x'], x)
        self.assertDataArrayIdentical(self.ds['y'], y)

        I = ReturnItem()
        for i in [I[:], I[...], I[x.values], I[x.variable], I[x], I[x, y],
                  I[x.values > -1], I[x.variable > -1], I[x > -1],
                  I[x > -1, y > -1]]:
            self.assertVariableEqual(self.dv, self.dv[i])
        for i in [I[0], I[:, 0], I[:3, :2],
                  I[x.values[:3]], I[x.variable[:3]], I[x[:3]], I[x[:3], y[:4]],
                  I[x.values > 3], I[x.variable > 3], I[x > 3], I[x > 3, y > 3]]:
            self.assertVariableEqual(self.v[i], self.dv[i])

    def test_getitem_dict(self):
        actual = self.dv[{'x': slice(3), 'y': 0}]
        expected = self.dv.isel(x=slice(3), y=0)
        self.assertDataArrayIdentical(expected, actual)

    def test_getitem_coords(self):
        orig = DataArray([[10], [20]],
                         {'x': [1, 2], 'y': [3], 'z': 4,
                          'x2': ('x', ['a', 'b']),
                          'y2': ('y', ['c']),
                          'xy': (['y', 'x'], [['d', 'e']])},
                         dims=['x', 'y'])

        self.assertDataArrayIdentical(orig, orig[:])
        self.assertDataArrayIdentical(orig, orig[:, :])
        self.assertDataArrayIdentical(orig, orig[...])
        self.assertDataArrayIdentical(orig, orig[:2, :1])
        self.assertDataArrayIdentical(orig, orig[[0, 1], [0]])

        actual = orig[0, 0]
        expected = DataArray(
            10, {'x': 1, 'y': 3, 'z': 4, 'x2': 'a', 'y2': 'c', 'xy': 'd'})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, :]
        expected = DataArray(
            [10], {'x': 1, 'y': [3], 'z': 4, 'x2': 'a', 'y2': ('y', ['c']),
                   'xy': ('y', ['d'])},
            dims='y')
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[:, 0]
        expected = DataArray(
            [10, 20], {'x': [1, 2], 'y': 3, 'z': 4, 'x2': ('x', ['a', 'b']),
                       'y2': 'c', 'xy': ('x', ['d', 'e'])},
            dims='x')
        self.assertDataArrayIdentical(expected, actual)

    def test_isel(self):
        self.assertDataArrayIdentical(self.dv[0], self.dv.isel(x=0))
        self.assertDataArrayIdentical(self.dv, self.dv.isel(x=slice(None)))
        self.assertDataArrayIdentical(self.dv[:3], self.dv.isel(x=slice(3)))
        self.assertDataArrayIdentical(self.dv[:3, :5],
                                      self.dv.isel(x=slice(3), y=slice(5)))

    def test_sel(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da, da.sel(x=slice(None)))
        self.assertDataArrayIdentical(da[1], da.sel(x='b'))
        self.assertDataArrayIdentical(da[:3], da.sel(x=slice('c')))
        self.assertDataArrayIdentical(da[:3], da.sel(x=['a', 'b', 'c']))
        self.assertDataArrayIdentical(da[:, :4], da.sel(y=(self.ds['y'] < 4)))
        # verify that indexing with a dataarray works
        b = DataArray('b')
        self.assertDataArrayIdentical(da[1], da.sel(x=b))
        self.assertDataArrayIdentical(da[[1]], da.sel(x=slice(b, b)))

    def test_loc(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da[:3], da.loc[:'c'])
        self.assertDataArrayIdentical(da[1], da.loc['b'])
        self.assertDataArrayIdentical(da[1], da.loc[{'x': 'b'}])
        self.assertDataArrayIdentical(da[:3], da.loc[['a', 'b', 'c']])
        self.assertDataArrayIdentical(da[:3, :4],
                                      da.loc[['a', 'b', 'c'], np.arange(4)])
        self.assertDataArrayIdentical(da[:, :4], da.loc[:, self.ds['y'] < 4])
        da.loc['a':'j'] = 0
        self.assertTrue(np.all(da.values == 0))
        da.loc[{'x': slice('a', 'j')}] = 2
        self.assertTrue(np.all(da.values == 2))

    def test_loc_single_boolean(self):
        data = DataArray([0, 1], coords=[[True, False]])
        self.assertEqual(data.loc[True], 0)
        self.assertEqual(data.loc[False], 1)

    def test_time_components(self):
        dates = pd.date_range('2000-01-01', periods=10)
        da = DataArray(np.arange(1, 11), [('time', dates)])

        self.assertArrayEqual(da['time.dayofyear'], da.values)
        self.assertArrayEqual(da.coords['time.dayofyear'], da.values)

    def test_coords(self):
        coords = [Coordinate('x', [-1, -2]), Coordinate('y', [0, 1, 2])]
        da = DataArray(np.random.randn(2, 3), coords, name='foo')

        self.assertEquals(2, len(da.coords))

        self.assertEqual(['x', 'y'], list(da.coords))

        self.assertTrue(coords[0].identical(da.coords['x']))
        self.assertTrue(coords[1].identical(da.coords['y']))

        self.assertIn('x', da.coords)
        self.assertNotIn(0, da.coords)
        self.assertNotIn('foo', da.coords)

        with self.assertRaises(KeyError):
            da.coords[0]
        with self.assertRaises(KeyError):
            da.coords['foo']

        expected = dedent("""\
        Coordinates:
          * x        (x) int64 -1 -2
          * y        (y) int64 0 1 2""")
        actual = repr(da.coords)
        self.assertEquals(expected, actual)

    def test_coord_coords(self):
        orig = DataArray([10, 20],
                         {'x': [1, 2], 'x2': ('x', ['a', 'b']), 'z': 4},
                         dims='x')

        actual = orig.coords['x']
        expected = DataArray([1, 2], {'z': 4, 'x2': ('x', ['a', 'b'])},
                             dims='x', name='x')
        self.assertDataArrayIdentical(expected, actual)

        del actual.coords['x2']
        self.assertDataArrayIdentical(
            expected.reset_coords('x2', drop=True), actual)

        actual.coords['x3'] = ('x', ['a', 'b'])
        expected = DataArray([1, 2], {'z': 4, 'x3': ('x', ['a', 'b'])},
                             dims='x', name='x')
        self.assertDataArrayIdentical(expected, actual)

    def test_reset_coords(self):
        data = DataArray(np.zeros((3, 4)),
                         {'bar': ('x', ['a', 'b', 'c']),
                          'baz': ('y', range(4))},
                         dims=['x', 'y'],
                         name='foo')

        actual = data.reset_coords()
        expected = Dataset({'foo': (['x', 'y'], np.zeros((3, 4))),
                            'bar': ('x', ['a', 'b', 'c']),
                            'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar', 'baz'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords('bar')
        expected = Dataset({'foo': (['x', 'y'], np.zeros((3, 4))),
                            'bar': ('x', ['a', 'b', 'c'])},
                           {'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(drop=True)
        expected = DataArray(np.zeros((3, 4)), dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        actual = data.copy()
        actual.reset_coords(drop=True, inplace=True)
        self.assertDataArrayIdentical(actual, expected)

        actual = data.reset_coords('bar', drop=True)
        expected = DataArray(np.zeros((3, 4)), {'baz': ('y', range(4))},
                             dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        with self.assertRaisesRegexp(ValueError, 'cannot reset coord'):
            data.reset_coords(inplace=True)
        with self.assertRaises(KeyError):
            data.reset_coords('foo', drop=True)
        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
            data.reset_coords('not_found')
        with self.assertRaisesRegexp(ValueError, 'cannot remove index'):
            data.reset_coords('y')

    def test_reindex(self):
        foo = self.dv
        bar = self.dv[:2, :2]
        self.assertDataArrayIdentical(foo.reindex_like(bar), bar)

        expected = foo.copy()
        expected[:] = np.nan
        expected[:2, :2] = bar
        self.assertDataArrayIdentical(bar.reindex_like(foo), expected)

        # regression test for #279
        expected = DataArray(np.random.randn(5), dims=["time"])
        time2 = DataArray(np.arange(5), dims="time2")
        actual = expected.reindex(time=time2)
        self.assertDataArrayIdentical(actual, expected)

    def test_rename(self):
        renamed = self.dv.rename('bar')
        self.assertDatasetIdentical(
            renamed.to_dataset(), self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

        renamed = self.dv.rename({'foo': 'bar'})
        self.assertDatasetIdentical(
            renamed.to_dataset(), self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

    def test_dataset_getitem(self):
        dv = self.ds['foo']
        self.assertDataArrayIdentical(dv, self.dv)

    def test_array_interface(self):
        self.assertArrayEqual(np.asarray(self.dv), self.x)
        # test patched in methods
        self.assertArrayEqual(self.dv.astype(float), self.v.astype(float))
        self.assertVariableEqual(self.dv.argsort(), self.v.argsort())
        self.assertVariableEqual(self.dv.clip(2, 3), self.v.clip(2, 3))
        # test ufuncs
        expected = deepcopy(self.ds)
        expected['foo'][:] = np.sin(self.x)
        self.assertDataArrayEqual(expected['foo'], np.sin(self.dv))
        self.assertDataArrayEqual(self.dv, np.maximum(self.v, self.dv))
        bar = Variable(['x', 'y'], np.zeros((10, 20)))
        self.assertDataArrayEqual(self.dv, np.maximum(self.dv, bar))

    def test_is_null(self):
        x = np.random.RandomState(42).randn(5, 6)
        x[x < 0] = np.nan
        original = DataArray(x, [-np.arange(5), np.arange(6)], ['x', 'y'])
        expected = DataArray(pd.isnull(x), [-np.arange(5), np.arange(6)],
                             ['x', 'y'])
        self.assertDataArrayIdentical(expected, original.isnull())
        self.assertDataArrayIdentical(~expected, original.notnull())

    def test_math(self):
        x = self.x
        v = self.v
        a = self.dv
        # variable math was already tested extensively, so let's just make sure
        # that all types are properly converted here
        self.assertDataArrayEqual(a, +a)
        self.assertDataArrayEqual(a, a + 0)
        self.assertDataArrayEqual(a, 0 + a)
        self.assertDataArrayEqual(a, a + 0 * v)
        self.assertDataArrayEqual(a, 0 * v + a)
        self.assertDataArrayEqual(a, a + 0 * x)
        self.assertDataArrayEqual(a, 0 * x + a)
        self.assertDataArrayEqual(a, a + 0 * a)
        self.assertDataArrayEqual(a, 0 * a + a)
        # test different indices
        b = a.copy()
        b.coords['x'] = 3 + np.arange(10)
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            a + b
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            b + a

    def test_inplace_math_basics(self):
        x = self.x
        v = self.v
        a = self.dv
        b = a
        b += 1
        self.assertIs(b, a)
        self.assertIs(b.variable, v)
        self.assertArrayEqual(b.values, x)
        self.assertIs(source_ndarray(b.values), x)
        self.assertDatasetIdentical(b._dataset, self.ds)

    def test_math_name(self):
        # Verify that name is preserved only when it can be done unambiguously.
        # The rule (copied from pandas.Series) is keep the current name only if
        # the other object has the same name or no name attribute and this
        # object isn't a coordinate; otherwise reset to None.
        a = self.dv
        self.assertEqual((+a).name, 'foo')
        self.assertEqual((a + 0).name, 'foo')
        self.assertIs((a + a.rename(None)).name, None)
        self.assertIs((a + a.rename('bar')).name, None)
        self.assertEqual((a + a).name, 'foo')
        self.assertIs((+a['x']).name, None)
        self.assertIs((a['x'] + 0).name, None)
        self.assertIs((a + a['x']).name, None)

    def test_math_with_coords(self):
        coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
                  'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
                  'c': -999}
        orig = DataArray(np.random.randn(2, 3), coords, dims=['x', 'y'])

        actual = orig + 1
        expected = DataArray(orig.values + 1, orig.coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = 1 + orig
        self.assertDataArrayIdentical(expected, actual)

        actual = orig + orig[0, 0]
        exp_coords = dict((k, v) for k, v in coords.items() if k != 'lat')
        expected = DataArray(orig.values + orig.values[0, 0],
                             exp_coords, dims=['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, 0] + orig
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, 0] + orig[-1, -1]
        expected = DataArray(orig.values[0, 0] + orig.values[-1, -1],
                             {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[:, 0] + orig[0, :]
        exp_values = orig[:, 0].values[:, None] + orig[0, :].values[None, :]
        expected = DataArray(exp_values, exp_coords, dims=['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, :] + orig[:, 0]
        self.assertDataArrayIdentical(expected.T, actual)

        actual = orig - orig.T
        expected = DataArray(np.zeros((2, 3)), orig.coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.T - orig
        self.assertDataArrayIdentical(expected.T, actual)

        alt = DataArray([1, 1], {'x': [-1, -2], 'c': 'foo', 'd': 555}, 'x')
        actual = orig + alt
        expected = orig + 1
        expected.coords['d'] = 555
        del expected.coords['c']
        self.assertDataArrayIdentical(expected, actual)

        actual = alt + orig
        self.assertDataArrayIdentical(expected, actual)

    def test_index_math(self):
        orig = DataArray(range(3), dims='x', name='x')
        actual = orig + 1
        expected = DataArray(1 + np.arange(3), coords=[('x', range(3))])
        self.assertDataArrayIdentical(expected, actual)

        # regression tests for #254
        actual = orig[0] < orig
        expected = DataArray([False, True, True], coords=[('x', range(3))])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig > orig[0]
        self.assertDataArrayIdentical(expected, actual)

    def test_dataset_math(self):
        # more comprehensive tests with multiple dataset variables
        obs = Dataset({'tmin': ('x', np.arange(5)),
                       'tmax': ('x', 10 + np.arange(5))},
                      {'x': ('x', 0.5 * np.arange(5)),
                       'loc': ('x', range(-2, 3))})

        actual = 2 * obs['tmax']
        expected = DataArray(2 * (10 + np.arange(5)), obs.coords, name='tmax')
        self.assertDataArrayIdentical(actual, expected)

        actual = obs['tmax'] - obs['tmin']
        expected = DataArray(10 * np.ones(5), obs.coords)
        self.assertDataArrayIdentical(actual, expected)

        sim = Dataset({'tmin': ('x', 1 + np.arange(5)),
                       'tmax': ('x', 11 + np.arange(5)),
                       # does *not* include 'loc' as a coordinate
                       'x': ('x', 0.5 * np.arange(5))})

        actual = sim['tmin'] - obs['tmin']
        expected = DataArray(np.ones(5), obs.coords, name='tmin')
        self.assertDataArrayIdentical(actual, expected)

        actual = -obs['tmin'] + sim['tmin']
        self.assertDataArrayIdentical(actual, expected)

        actual = sim['tmin'].copy()
        actual -= obs['tmin']
        self.assertDataArrayIdentical(actual, expected)

        actual = sim.copy()
        actual['tmin'] = sim['tmin'] - obs['tmin']
        expected = Dataset({'tmin': ('x', np.ones(5)),
                            'tmax': ('x', sim['tmax'].values)},
                            obs.coords)
        self.assertDatasetIdentical(actual, expected)

        actual = sim.copy()
        actual['tmin'] -= obs['tmin']
        self.assertDatasetIdentical(actual, expected)

    def test_transpose(self):
        self.assertVariableEqual(self.dv.variable.transpose(),
                                 self.dv.transpose())

    def test_squeeze(self):
        self.assertVariableEqual(self.dv.variable.squeeze(), self.dv.squeeze())

    def test_dropna(self):
        x = np.random.randn(4, 4)
        x[::2, 0] = np.nan
        arr = DataArray(x, dims=['a', 'b'])

        actual = arr.dropna('a')
        expected = arr[1::2]
        self.assertDataArrayIdentical(actual, expected)

        actual = arr.dropna('b', how='all')
        self.assertDataArrayIdentical(actual, arr)

        actual = arr.dropna('a', thresh=1)
        self.assertDataArrayIdentical(actual, arr)

        actual = arr.dropna('b', thresh=3)
        expected = arr[:, 1:]
        self.assertDataArrayIdentical(actual, expected)

    def test_reduce(self):
        coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
                  'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
                  'c': -999}
        orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

        actual = orig.mean()
        expected = DataArray(0, {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean(['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean('x')
        expected = DataArray([-2, 0, 2], {'y': coords['y'], 'c': -999}, 'y')
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean(['x'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean('y')
        expected = DataArray([0, 0], {'x': coords['x'], 'c': -999}, 'x')
        self.assertDataArrayIdentical(expected, actual)

        self.assertVariableEqual(self.dv.reduce(np.mean, 'x'),
                                 self.v.reduce(np.mean, 'x'))

        orig = DataArray([[1, 0, np.nan], [3, 0, 3]], coords, dims=['x', 'y'])
        actual = orig.count()
        expected = DataArray(5, {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

    def test_reduce_keep_attrs(self):
        # Test dropped attrs
        vm = self.va.mean()
        self.assertEqual(len(vm.attrs), 0)
        self.assertEqual(vm.attrs, OrderedDict())

        # Test kept attrs
        vm = self.va.mean(keep_attrs=True)
        self.assertEqual(len(vm.attrs), len(self.attrs))
        self.assertEqual(vm.attrs, self.attrs)

    def test_groupby_iter(self):
        for ((act_x, act_dv), (exp_x, exp_ds)) in \
                zip(self.dv.groupby('y'), self.ds.groupby('y')):
            self.assertEqual(exp_x, act_x)
            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
        for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv):
            self.assertDataArrayIdentical(exp_dv, act_dv)

    def make_groupby_example_array(self):
        da = self.dv.copy()
        da.coords['abc'] = ('y', np.array(['a'] * 9 + ['c'] + ['b'] * 10))
        da.coords['y'] = 20 + 100 * da['y']
        return da

    def test_groupby_properties(self):
        grouped = self.make_groupby_example_array().groupby('abc')
        expected_unique = Variable('abc', ['a', 'b', 'c'])
        self.assertVariableEqual(expected_unique, grouped.unique_coord)
        self.assertEqual(3, len(grouped))

    def test_groupby_apply_identity(self):
        expected = self.make_groupby_example_array()
        idx = expected.coords['y']
        identity = lambda x: x
        for g in ['x', 'y', 'abc', idx]:
            for shortcut in [False, True]:
                for squeeze in [False, True]:
                    grouped = expected.groupby(g, squeeze=squeeze)
                    actual = grouped.apply(identity, shortcut=shortcut)
                    self.assertDataArrayIdentical(expected, actual)

    def test_groupby_sum(self):
        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_sum_all = Dataset(
            {'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
                                                self.x[:, 10:].sum(),
                                                self.x[:, 9:10].sum()]).T),
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())

        expected = DataArray([array['y'].values[idx].sum() for idx
                              in [slice(9), slice(10, None), slice(9, 10)]],
                             [['a', 'b', 'c']], ['abc'])
        actual = array['y'].groupby('abc').apply(np.sum)
        self.assertDataArrayAllClose(expected, actual)
        actual = array['y'].groupby('abc').sum()
        self.assertDataArrayAllClose(expected, actual)

        expected_sum_axis1 = Dataset(
            {'foo': (['x', 'abc'], np.array([self.x[:, :9].sum(1),
                                             self.x[:, 10:].sum(1),
                                             self.x[:, 9:10].sum(1)]).T),
             'x': self.ds['x'],
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_axis1,
                                     grouped.reduce(np.sum, 'y'))
        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))

    def test_groupby_count(self):
        array = DataArray([0, 0, np.nan, np.nan, 0, 0],
                          coords={'cat': ('x', ['a', 'b', 'b', 'c', 'c', 'c'])},
                          dims='x')
        actual = array.groupby('cat').count()
        expected = DataArray([1, 1, 2], coords=[('cat', ['a', 'b', 'c'])])
        self.assertDataArrayIdentical(actual, expected)

    @unittest.skip('needs to be fixed for shortcut=False, keep_attrs=False')
    def test_groupby_reduce_attrs(self):
        array = self.make_groupby_example_array()
        array.attrs['foo'] = 'bar'

        for shortcut in [True, False]:
            for keep_attrs in [True, False]:
                print('shortcut=%s, keep_attrs=%s' % (shortcut, keep_attrs))
                actual = array.groupby('abc').reduce(
                    np.mean, keep_attrs=keep_attrs, shortcut=shortcut)
                expected = array.groupby('abc').mean()
                if keep_attrs:
                    expected.attrs['foo'] = 'bar'
                self.assertDataArrayIdentical(expected, actual)

    def test_groupby_apply_center(self):
        def center(x):
            return x - np.mean(x)

        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_ds = array.to_dataset()
        exp_data = np.hstack([center(self.x[:, :9]),
                              center(self.x[:, 9:10]),
                              center(self.x[:, 10:])])
        expected_ds['foo'] = (['x', 'y'], exp_data)
        expected_centered = expected_ds['foo']
        self.assertDataArrayAllClose(expected_centered, grouped.apply(center))

    def test_groupby_math(self):
        array = self.make_groupby_example_array()
        for squeeze in [True, False]:
            grouped = array.groupby('x', squeeze=squeeze)

            expected = array + array.coords['x']
            actual = grouped + array.coords['x']
            self.assertDataArrayIdentical(expected, actual)

            actual = array.coords['x'] + grouped
            self.assertDataArrayIdentical(expected, actual)

            ds = array.coords['x'].to_dataset()
            expected = array + ds
            actual = grouped + ds
            self.assertDatasetIdentical(expected, actual)

            actual = ds + grouped
            self.assertDatasetIdentical(expected, actual)

        grouped = array.groupby('abc')
        expected_agg = (grouped.mean() - np.arange(3)).rename(None)
        actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])])
        actual_agg = actual.groupby('abc').mean()
        self.assertDataArrayAllClose(expected_agg, actual_agg)

        with self.assertRaisesRegexp(TypeError, 'only support arithmetic'):
            grouped + 1
        with self.assertRaisesRegexp(TypeError, 'only support arithmetic'):
            grouped + grouped

    def test_concat(self):
        self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20))
        foo = self.ds['foo']
        bar = self.ds['bar']
        # from dataset array:
        expected = DataArray(np.array([foo.values, bar.values]),
                             dims=['w', 'x', 'y'])
        actual = concat([foo, bar], 'w')
        self.assertDataArrayEqual(expected, actual)
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = concat(grouped, self.ds['x'])
        self.assertDataArrayIdentical(foo, stacked)
        # with an index as the 'dim' argument
        stacked = concat(grouped, self.ds.indexes['x'])
        self.assertDataArrayIdentical(foo, stacked)

        actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        actual = concat([foo[0], foo[1]], [0, 1]).reset_coords(drop=True)
        expected = foo[:2].rename({'x': 'concat_dim'})
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'not identical'):
            concat([foo, bar], compat='identical')

    def test_align(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        with self.assertRaises(ValueError):
            self.dv + self.dv[:5]
        dv1, dv2 = align(self.dv, self.dv[:5], join='inner')
        self.assertDataArrayIdentical(dv1, self.dv[:5])
        self.assertDataArrayIdentical(dv2, self.dv[:5])

    def test_align_dtype(self):
        # regression test for #264
        x1 = np.arange(30)
        x2 = np.arange(5, 35)
        a = DataArray(np.random.random((30,)).astype('f32'), {'x': x1})
        b = DataArray(np.random.random((30,)).astype('f32'), {'x': x2})
        c, d = align(a, b, join='outer')
        self.assertEqual(c.dtype, np.float32)

    def test_broadcast_arrays(self):
        x = DataArray([1, 2], coords=[('a', [-1, -2])], name='x')
        y = DataArray([1, 2], coords=[('b', [3, 4])], name='y')
        x2, y2 = broadcast_arrays(x, y)
        expected_coords = [('a', [-1, -2]), ('b', [3, 4])]
        expected_x2 = DataArray([[1, 1], [2, 2]], expected_coords, name='x')
        expected_y2 = DataArray([[1, 2], [1, 2]], expected_coords, name='y')
        self.assertDataArrayIdentical(expected_x2, x2)
        self.assertDataArrayIdentical(expected_y2, y2)

        x = DataArray(np.random.randn(2, 3), dims=['a', 'b'])
        y = DataArray(np.random.randn(3, 2), dims=['b', 'a'])
        x2, y2 = broadcast_arrays(x, y)
        expected_x2 = x
        expected_y2 = y.T
        self.assertDataArrayIdentical(expected_x2, x2)
        self.assertDataArrayIdentical(expected_y2, y2)

        with self.assertRaisesRegexp(ValueError, 'cannot broadcast'):
            z = DataArray([1, 2], coords=[('a', [-10, 20])])
            broadcast_arrays(x, z)

    def test_to_pandas(self):
        # 0d
        actual = DataArray(42).to_pandas()
        expected = np.array(42)
        self.assertArrayEqual(actual, expected)

        # 1d
        values = np.random.randn(3)
        index = pd.Index(['a', 'b', 'c'], name='x')
        da = DataArray(values, coords=[index])
        actual = da.to_pandas()
        self.assertArrayEqual(actual.values, values)
        self.assertArrayEqual(actual.index, index)
        self.assertArrayEqual(actual.index.name, 'x')

        # 2d
        values = np.random.randn(3, 2)
        da = DataArray(values, coords=[('x', ['a', 'b', 'c']), ('y', [0, 1])],
                       name='foo')
        actual = da.to_pandas()
        self.assertArrayEqual(actual.values, values)
        self.assertArrayEqual(actual.index, ['a', 'b', 'c'])
        self.assertArrayEqual(actual.columns, [0, 1])

        # roundtrips
        for shape in [(3,), (3, 4), (3, 4, 5)]:
            dims = list('abc')[:len(shape)]
            da = DataArray(np.random.randn(*shape), dims=dims)
            roundtripped = DataArray(da.to_pandas())
            self.assertDataArrayIdentical(da, roundtripped)

        with self.assertRaisesRegexp(ValueError, 'cannot convert'):
            DataArray(np.random.randn(1, 2, 3, 4, 5)).to_pandas()

    def test_to_dataframe(self):
        # regression test for #260
        arr = DataArray(np.random.randn(3, 4),
                        [('B', [1, 2, 3]), ('A', list('cdef'))])
        expected = arr.to_series()
        actual = arr.to_dataframe()[None]
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)

    def test_to_and_from_series(self):
        expected = self.dv.to_dataframe()['foo']
        actual = self.dv.to_series()
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)
        self.assertEqual('foo', actual.name)
        # test roundtrip
        self.assertDataArrayIdentical(self.dv, DataArray.from_series(actual))
        # test name is None
        actual.name = None
        expected_da = self.dv.rename(None)
        self.assertDataArrayIdentical(expected_da,
                                      DataArray.from_series(actual))

    def test_to_and_from_cdms2(self):
        try:
            import cdms2
        except ImportError:
            raise unittest.SkipTest('cdms2 not installed')

        original = DataArray(np.arange(6).reshape(2, 3),
                             [('distance', [-2, 2], {'units': 'meters'}),
                              ('time', pd.date_range('2000-01-01', periods=3))],
                             name='foo', attrs={'baz': 123})
        expected_coords = [Coordinate('distance', [-2, 2]),
                           Coordinate('time', [0, 1, 2])]
        actual = original.to_cdms2()
        self.assertArrayEqual(actual, original)
        self.assertEqual(actual.id, original.name)
        self.assertItemsEqual(actual.getAxisIds(), original.dims)
        for axis, coord in zip(actual.getAxisList(), expected_coords):
            self.assertEqual(axis.id, coord.name)
            self.assertArrayEqual(axis, coord.values)
        self.assertEqual(actual.baz, original.attrs['baz'])

        component_times = actual.getAxis(1).asComponentTime()
        self.assertEqual(len(component_times), 3)
        self.assertEqual(str(component_times[0]), '2000-1-1 0:0:0.0')

        roundtripped = DataArray.from_cdms2(actual)
        self.assertDataArrayIdentical(original, roundtripped)

    def test_to_dataset(self):
        unnamed = DataArray([1, 2], dims='x')
        actual = unnamed.to_dataset()
        expected = Dataset({None: ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)
        self.assertIsNot(unnamed._dataset, actual)

        actual = unnamed.to_dataset('foo')
        expected = Dataset({'foo': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)

        named = DataArray([1, 2], dims='x', name='foo')
        actual = named.to_dataset()
        expected = Dataset({'foo': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)

        actual = named.to_dataset('bar')
        expected = Dataset({'bar': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)
Ejemplo n.º 8
0
class TestDataArray(TestCase):
    def setUp(self):
        self.attrs = {'attr1': 'value1', 'attr2': 2929}
        self.x = np.random.random((10, 20))
        self.v = Variable(['x', 'y'], self.x)
        self.va = Variable(['x', 'y'], self.x, self.attrs)
        self.ds = Dataset({'foo': self.v})
        self.dv = self.ds['foo']

    def test_repr(self):
        v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
        data_array = DataArray(v, {'other': ([], 0)}, name='my_variable')
        expected = dedent("""\
        <xray.DataArray 'my_variable' (time: 2, x: 3)>
        array([[1, 2, 3],
               [4, 5, 6]])
        Index Coordinates:
            time  (time) int64 0 1
            x     (x) int64 0 1 2
        Other Coordinates:
            other int64 0
        Attributes:
            foo: bar""")
        self.assertEqual(expected, repr(data_array))

    def test_properties(self):
        self.assertVariableEqual(self.dv.variable, self.v)
        self.assertArrayEqual(self.dv.values, self.v.values)
        for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
            self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
        self.assertEqual(len(self.dv), len(self.v))
        self.assertVariableEqual(self.dv, self.v)
        self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords))
        for k, v in iteritems(self.dv.coords):
            self.assertArrayEqual(v, self.ds.coords[k])
        with self.assertRaises(AttributeError):
            self.dv.dataset = self.ds
        self.assertIsInstance(self.ds['x'].to_index(), pd.Index)
        with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
            self.ds['foo'].to_index()
        with self.assertRaises(AttributeError):
            self.dv.variable = self.v

    def test_name(self):
        arr = self.dv
        self.assertEqual(arr.name, 'foo')

        copied = arr.copy()
        arr.name = 'bar'
        self.assertEqual(arr.name, 'bar')
        self.assertDataArrayEqual(copied, arr)

        actual = DataArray(Coordinate('x', [3]))
        actual.name = 'y'
        expected = DataArray(Coordinate('y', [3]))
        self.assertDataArrayIdentical(actual, expected)

    def test_dims(self):
        arr = self.dv
        self.assertEqual(arr.dims, ('x', 'y'))

        arr.dims = ('w', 'z')
        self.assertEqual(arr.dims, ('w', 'z'))

        x = Dataset({'x': ('x', np.arange(5))})['x']
        x.dims = ('y',)
        self.assertEqual(x.dims, ('y',))
        self.assertEqual(x.name, 'y')

    def test_encoding(self):
        expected = {'foo': 'bar'}
        self.dv.encoding['foo'] = 'bar'
        self.assertEquals(expected, self.dv.encoding)

        expected = {'baz': 0}
        self.dv.encoding = expected
        self.assertEquals(expected, self.dv.encoding)
        self.assertIsNot(expected, self.dv.encoding)

    def test_constructor(self):
        data = np.random.random((2, 3))

        actual = DataArray(data)
        expected = Dataset({None: (['dim_0', 'dim_1'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [['a', 'b'], [-1, -2, -3]])
        expected = Dataset({None: (['dim_0', 'dim_1'], data),
                            'dim_0': ('dim_0', ['a', 'b']),
                            'dim_1': ('dim_1', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [pd.Index(['a', 'b'], name='x'),
                                  pd.Index([-1, -2, -3], name='y')])
        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b']),
                            'y': ('y', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        coords = [['a', 'b'], [-1, -2, -3]]
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = [pd.Index(['a', 'b'], name='A'),
                  pd.Index([-1, -2, -3], name='B')]
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = {'x': ['a', 'b'], 'y': [-1, -2, -3]}
        actual = DataArray(data, coords, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        coords = [('x', ['a', 'b']), ('y', [-1, -2, -3])]
        actual = DataArray(data, coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, OrderedDict(coords))
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b'])})[None]
        actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'])
        expected = Dataset({None: (['x', 'y'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], name='foo')
        expected = Dataset({'foo': (['x', 'y'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, name='foo')
        expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], attrs={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dims=['x', 'y'], encoding={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

    def test_constructor_invalid(self):
        data = np.random.randn(3, 2)

        with self.assertRaisesRegexp(ValueError, 'coords is not dict-like'):
            DataArray(data, [[0, 1, 2]], ['x', 'y'])

        with self.assertRaisesRegexp(ValueError, 'not a subset of the .* dim'):
            DataArray(data, {'x': [0, 1, 2]}, ['a', 'b'])
        with self.assertRaisesRegexp(ValueError, 'not a subset of the .* dim'):
            DataArray(data, {'x': [0, 1, 2]})

        with self.assertRaisesRegexp(TypeError, 'is not a string'):
            DataArray(data, dims=['x', None])

    def test_constructor_from_self_described(self):
        data = [[-0.1, 21], [0, 2]]
        expected = DataArray(data,
                             coords={'x': ['a', 'b'], 'y': [-1, -2]},
                             dims=['x', 'y'], name='foobar',
                             attrs={'bar': 2}, encoding={'foo': 3})
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(expected.values, actual.coords)
        self.assertDataArrayEqual(expected, actual)

        frame = pd.DataFrame(data, index=pd.Index(['a', 'b'], name='x'),
                             columns=pd.Index([-1, -2], name='y'))
        actual = DataArray(frame)
        self.assertDataArrayEqual(expected, actual)

        series = pd.Series(data[0], index=pd.Index([-1, -2], name='y'))
        actual = DataArray(series)
        self.assertDataArrayEqual(expected[0].reset_coords('x', drop=True),
                                  actual)

        panel = pd.Panel({0: frame})
        actual = DataArray(panel)
        expected = DataArray([data], expected.coords, ['dim_0', 'x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        expected = DataArray(data,
                             coords={'x': ['a', 'b'], 'y': [-1, -2],
                                     'a': 0, 'z': ('x', [-0.5, 0.5])},
                             dims=['x', 'y'])
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(expected.values, expected.coords)
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({'foo': ('foo', ['a', 'b'])})['foo']
        actual = DataArray(pd.Index(['a', 'b'], name='foo'))
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(Coordinate('foo', ['a', 'b']))
        self.assertDataArrayIdentical(expected, actual)

        s = pd.Series(range(2), pd.MultiIndex.from_product([['a', 'b'], [0]]))
        with self.assertRaisesRegexp(NotImplementedError, 'MultiIndex'):
            DataArray(s)

    def test_constructor_from_0d(self):
        expected = Dataset({None: ([], 0)})[None]
        actual = DataArray(0)
        self.assertDataArrayIdentical(expected, actual)

    def test_equals_and_identical(self):
        orig = DataArray(np.arange(5.0), {'a': 42}, dims='x')

        expected = orig
        actual = orig.copy()
        self.assertTrue(expected.equals(actual))
        self.assertTrue(expected.identical(actual))

        actual = expected.rename('baz')
        self.assertTrue(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.rename({'x': 'xxx'})
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual.attrs['foo'] = 'bar'
        self.assertTrue(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual['x'] = ('x', -np.arange(5))
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.reset_coords(drop=True)
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = orig.copy()
        actual[0] = np.nan
        expected = actual.copy()
        self.assertTrue(expected.equals(actual))
        self.assertTrue(expected.identical(actual))

        actual[:] = np.nan
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

        actual = expected.copy()
        actual['a'] = 100000
        self.assertFalse(expected.equals(actual))
        self.assertFalse(expected.identical(actual))

    def test_getitem(self):
        # strings pull out dataarrays
        self.assertDataArrayIdentical(self.dv, self.ds['foo'])
        x = self.dv['x']
        y = self.dv['y']
        self.assertDataArrayIdentical(self.ds['x'], x)
        self.assertDataArrayIdentical(self.ds['y'], y)

        I = ReturnItem()
        for i in [I[:], I[...], I[x.values], I[x.variable], I[x], I[x, y],
                  I[x.values > -1], I[x.variable > -1], I[x > -1],
                  I[x > -1, y > -1]]:
            self.assertVariableEqual(self.dv, self.dv[i])
        for i in [I[0], I[:, 0], I[:3, :2],
                  I[x.values[:3]], I[x.variable[:3]], I[x[:3]], I[x[:3], y[:4]],
                  I[x.values > 3], I[x.variable > 3], I[x > 3], I[x > 3, y > 3]]:
            self.assertVariableEqual(self.v[i], self.dv[i])

    def test_getitem_coords(self):
        orig = DataArray([[10], [20]],
                         {'x': [1, 2], 'y': [3], 'z': 4,
                          'x2': ('x', ['a', 'b']),
                          'y2': ('y', ['c']),
                          'xy': (['y', 'x'], [['d', 'e']])},
                         dims=['x', 'y'])

        self.assertDataArrayIdentical(orig, orig[:])
        self.assertDataArrayIdentical(orig, orig[:, :])
        self.assertDataArrayIdentical(orig, orig[...])
        self.assertDataArrayIdentical(orig, orig[:2, :1])
        self.assertDataArrayIdentical(orig, orig[[0, 1], [0]])

        actual = orig[0, 0]
        expected = DataArray(
            10, {'x': 1, 'y': 3, 'z': 4, 'x2': 'a', 'y2': 'c', 'xy': 'd'})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, :]
        expected = DataArray(
            [10], {'x': 1, 'y': [3], 'z': 4, 'x2': 'a', 'y2': ('y', ['c']),
                   'xy': ('y', ['d'])},
            dims='y')
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[:, 0]
        expected = DataArray(
            [10, 20], {'x': [1, 2], 'y': 3, 'z': 4, 'x2': ('x', ['a', 'b']),
                       'y2': 'c', 'xy': ('x', ['d', 'e'])},
            dims='x')
        self.assertDataArrayIdentical(expected, actual)

    def test_isel(self):
        self.assertDatasetIdentical(self.dv[0].to_dataset(), self.ds.isel(x=0))
        self.assertDatasetIdentical(self.dv[:3, :5].to_dataset(),
                                    self.ds.isel(x=slice(3), y=slice(5)))
        self.assertDataArrayIdentical(self.dv, self.dv.isel(x=slice(None)))
        self.assertDataArrayIdentical(self.dv[:3], self.dv.isel(x=slice(3)))

    def test_sel(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da, da.sel(x=slice(None)))
        self.assertDataArrayIdentical(da[1], da.sel(x='b'))
        self.assertDataArrayIdentical(da[:3], da.sel(x=slice('c')))
        self.assertDataArrayIdentical(da[:3], da.sel(x=['a', 'b', 'c']))
        self.assertDataArrayIdentical(da[:, :4], da.sel(y=(self.ds['y'] < 4)))

    def test_loc(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da[:3], da.loc[:'c'])
        self.assertDataArrayIdentical(da[1], da.loc['b'])
        self.assertDataArrayIdentical(da[:3], da.loc[['a', 'b', 'c']])
        self.assertDataArrayIdentical(da[:3, :4],
                                      da.loc[['a', 'b', 'c'], np.arange(4)])
        self.assertDataArrayIdentical(da[:, :4], da.loc[:, self.ds['y'] < 4])
        da.loc['a':'j'] = 0
        self.assertTrue(np.all(da.values == 0))

    def test_loc_single_boolean(self):
        data = DataArray([0, 1], coords=[[True, False]])
        self.assertEqual(data.loc[True], 0)
        self.assertEqual(data.loc[False], 1)

    def test_time_components(self):
        dates = pd.date_range('2000-01-01', periods=10)
        da = DataArray(np.arange(1, 11), [('time', dates)])

        self.assertArrayEqual(da['time.dayofyear'], da.values)
        self.assertArrayEqual(da.coords['time.dayofyear'], da.values)

    def test_coords(self):
        coords = [Coordinate('x', [-1, -2]), Coordinate('y', [0, 1, 2])]
        da = DataArray(np.random.randn(2, 3), coords, name='foo')

        self.assertEquals(2, len(da.coords))

        self.assertEqual(['x', 'y'], list(da.coords))

        self.assertTrue(coords[0].identical(da.coords['x']))
        self.assertTrue(coords[1].identical(da.coords['y']))

        self.assertIn('x', da.coords)
        self.assertNotIn(0, da.coords)
        self.assertNotIn('foo', da.coords)

        with self.assertRaises(KeyError):
            da.coords[0]
        with self.assertRaises(KeyError):
            da.coords['foo']

        expected = dedent("""\
        Index Coordinates:
            x (x) int64 -1 -2
            y (y) int64 0 1 2""")
        actual = repr(da.coords)
        self.assertEquals(expected, actual)

    def test_coord_coords(self):
        orig = DataArray([10, 20],
                         {'x': [1, 2], 'x2': ('x', ['a', 'b']), 'z': 4},
                         dims='x')

        actual = orig.coords['x']
        expected = DataArray([1, 2], {'z': 4, 'x2': ('x', ['a', 'b'])},
                             dims='x', name='x')
        self.assertDataArrayIdentical(expected, actual)

        del actual.coords['x2']
        self.assertDataArrayIdentical(
            expected.reset_coords('x2', drop=True), actual)

        actual.coords['x3'] = ('x', ['a', 'b'])
        expected = DataArray([1, 2], {'z': 4, 'x3': ('x', ['a', 'b'])},
                             dims='x', name='x')
        self.assertDataArrayIdentical(expected, actual)

    def test_reset_coords(self):
        data = DataArray(np.zeros((3, 4)),
                         {'bar': ('x', ['a', 'b', 'c']),
                          'baz': ('y', range(4))},
                         dims=['x', 'y'],
                         name='foo')

        actual = data.reset_coords()
        expected = Dataset({'foo': (['x', 'y'], np.zeros((3, 4))),
                            'bar': ('x', ['a', 'b', 'c']),
                            'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar', 'baz'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords('bar')
        expected = Dataset({'foo': (['x', 'y'], np.zeros((3, 4))),
                            'bar': ('x', ['a', 'b', 'c'])},
                           {'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(drop=True)
        expected = DataArray(np.zeros((3, 4)), dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        actual = data.copy()
        actual.reset_coords(drop=True, inplace=True)
        self.assertDataArrayIdentical(actual, expected)

        actual = data.reset_coords('bar', drop=True)
        expected = DataArray(np.zeros((3, 4)), {'baz': ('y', range(4))},
                             dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        with self.assertRaisesRegexp(ValueError, 'cannot reset coord'):
            data.reset_coords(inplace=True)
        with self.assertRaises(KeyError):
            data.reset_coords('foo', drop=True)
        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
            data.reset_coords('not_found')
        with self.assertRaisesRegexp(ValueError, 'cannot remove index'):
            data.reset_coords('y')

    def test_reindex(self):
        foo = self.dv
        bar = self.dv[:2, :2]
        self.assertDataArrayIdentical(foo.reindex_like(bar), bar)

        expected = foo.copy()
        expected[:] = np.nan
        expected[:2, :2] = bar
        self.assertDataArrayIdentical(bar.reindex_like(foo), expected)

    def test_rename(self):
        renamed = self.dv.rename('bar')
        self.assertDatasetIdentical(
            renamed.to_dataset(), self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

        renamed = self.dv.rename({'foo': 'bar'})
        self.assertDatasetIdentical(
            renamed.to_dataset(), self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

    def test_dataset_getitem(self):
        dv = self.ds['foo']
        self.assertDataArrayIdentical(dv, self.dv)

    def test_array_interface(self):
        self.assertArrayEqual(np.asarray(self.dv), self.x)
        # test patched in methods
        self.assertArrayEqual(self.dv.astype(float), self.v.astype(float))
        self.assertVariableEqual(self.dv.argsort(), self.v.argsort())
        self.assertVariableEqual(self.dv.clip(2, 3), self.v.clip(2, 3))
        # test ufuncs
        expected = deepcopy(self.ds)
        expected['foo'][:] = np.sin(self.x)
        self.assertDataArrayEqual(expected['foo'], np.sin(self.dv))
        self.assertDataArrayEqual(self.dv, np.maximum(self.v, self.dv))
        bar = Variable(['x', 'y'], np.zeros((10, 20)))
        self.assertDataArrayEqual(self.dv, np.maximum(self.dv, bar))

    def test_is_null(self):
        x = np.random.RandomState(42).randn(5, 6)
        x[x < 0] = np.nan
        original = DataArray(x, [-np.arange(5), np.arange(6)], ['x', 'y'])
        expected = DataArray(pd.isnull(x), [-np.arange(5), np.arange(6)],
                             ['x', 'y'])
        self.assertDataArrayIdentical(expected, original.isnull())
        self.assertDataArrayIdentical(~expected, original.notnull())

    def test_math(self):
        x = self.x
        v = self.v
        a = self.dv
        # variable math was already tested extensively, so let's just make sure
        # that all types are properly converted here
        self.assertDataArrayEqual(a, +a)
        self.assertDataArrayEqual(a, a + 0)
        self.assertDataArrayEqual(a, 0 + a)
        self.assertDataArrayEqual(a, a + 0 * v)
        self.assertDataArrayEqual(a, 0 * v + a)
        self.assertDataArrayEqual(a, a + 0 * x)
        self.assertDataArrayEqual(a, 0 * x + a)
        self.assertDataArrayEqual(a, a + 0 * a)
        self.assertDataArrayEqual(a, 0 * a + a)
        # test different indices
        b = a.copy()
        b.coords['x'] = 3 + np.arange(10)
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            a + b
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            b + a

    def test_inplace_math_basics(self):
        x = self.x
        v = self.v
        a = self.dv
        b = a
        b += 1
        self.assertIs(b, a)
        self.assertIs(b.variable, v)
        self.assertArrayEqual(b.values, x)
        self.assertIs(source_ndarray(b.values), x)
        self.assertDatasetIdentical(b._dataset, self.ds)

    def test_math_name(self):
        # Verify that name is preserved only when it can be done unambiguously.
        # The rule (copied from pandas.Series) is keep the current name only if
        # the other object has the same name or no name attribute and this
        # object isn't a coordinate; otherwise reset to None.
        a = self.dv
        self.assertEqual((+a).name, 'foo')
        self.assertEqual((a + 0).name, 'foo')
        self.assertIs((a + a.rename(None)).name, None)
        self.assertIs((a + a.rename('bar')).name, None)
        self.assertEqual((a + a).name, 'foo')
        self.assertIs((+a['x']).name, None)
        self.assertIs((a['x'] + 0).name, None)
        self.assertIs((a + a['x']).name, None)

    def test_math_with_coords(self):
        coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
                  'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
                  'c': -999}
        orig = DataArray(np.random.randn(2, 3), coords, dims=['x', 'y'])

        actual = orig + 1
        expected = DataArray(orig.values + 1, orig.coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = 1 + orig
        self.assertDataArrayIdentical(expected, actual)

        actual = orig + orig[0, 0]
        exp_coords = dict((k, v) for k, v in coords.items() if k != 'lat')
        expected = DataArray(orig.values + orig.values[0, 0],
                             exp_coords, dims=['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, 0] + orig
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, 0] + orig[-1, -1]
        expected = DataArray(orig.values[0, 0] + orig.values[-1, -1],
                             {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[:, 0] + orig[0, :]
        exp_values = orig[:, 0].values[:, None] + orig[0, :].values[None, :]
        expected = DataArray(exp_values, exp_coords, dims=['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig[0, :] + orig[:, 0]
        self.assertDataArrayIdentical(expected.T, actual)

        actual = orig - orig.T
        expected = DataArray(np.zeros((2, 3)), orig.coords)
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.T - orig
        self.assertDataArrayIdentical(expected.T, actual)

        alt = DataArray([1, 1], {'x': [-1, -2], 'c': 'foo', 'd': 555}, 'x')
        actual = orig + alt
        expected = orig + 1
        expected.coords['d'] = 555
        del expected.coords['c']
        self.assertDataArrayIdentical(expected, actual)

        actual = alt + orig
        self.assertDataArrayIdentical(expected, actual)

    def test_index_math(self):
        orig = DataArray(range(3), dims='x', name='x')
        actual = orig + 1
        expected = DataArray(1 + np.arange(3), coords=[('x', range(3))])
        self.assertDataArrayIdentical(expected, actual)

    def test_dataset_math(self):
        # more comprehensive tests with multiple dataset variables
        obs = Dataset({'tmin': ('x', np.arange(5)),
                       'tmax': ('x', 10 + np.arange(5))},
                      {'x': ('x', 0.5 * np.arange(5)),
                       'loc': ('x', range(-2, 3))})

        actual = 2 * obs['tmax']
        expected = DataArray(2 * (10 + np.arange(5)), obs.coords, name='tmax')
        self.assertDataArrayIdentical(actual, expected)

        actual = obs['tmax'] - obs['tmin']
        expected = DataArray(10 * np.ones(5), obs.coords)
        self.assertDataArrayIdentical(actual, expected)

        sim = Dataset({'tmin': ('x', 1 + np.arange(5)),
                       'tmax': ('x', 11 + np.arange(5)),
                       # does *not* include 'loc' as a coordinate
                       'x': ('x', 0.5 * np.arange(5))})

        actual = sim['tmin'] - obs['tmin']
        expected = DataArray(np.ones(5), obs.coords, name='tmin')
        self.assertDataArrayIdentical(actual, expected)

        actual = -obs['tmin'] + sim['tmin']
        self.assertDataArrayIdentical(actual, expected)

        actual = sim['tmin'].copy()
        actual -= obs['tmin']
        self.assertDataArrayIdentical(actual, expected)

        actual = sim.copy()
        actual['tmin'] = sim['tmin'] - obs['tmin']
        expected = Dataset({'tmin': ('x', np.ones(5)),
                            'tmax': ('x', sim['tmax'].values)},
                            obs.coords)
        self.assertDatasetIdentical(actual, expected)

        actual = sim.copy()
        actual['tmin'] -= obs['tmin']
        self.assertDatasetIdentical(actual, expected)

    def test_transpose(self):
        self.assertVariableEqual(self.dv.variable.transpose(),
                                 self.dv.transpose())

    def test_squeeze(self):
        self.assertVariableEqual(self.dv.variable.squeeze(), self.dv.squeeze())

    def test_reduce(self):
        coords = {'x': [-1, -2], 'y': ['ab', 'cd', 'ef'],
                  'lat': (['x', 'y'], [[1, 2, 3], [-1, -2, -3]]),
                  'c': -999}
        orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=['x', 'y'])

        actual = orig.mean()
        expected = DataArray(0, {'c': -999})
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean(['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean('x')
        expected = DataArray([-2, 0, 2], {'y': coords['y'], 'c': -999}, 'y')
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean(['x'])
        self.assertDataArrayIdentical(expected, actual)

        actual = orig.mean('y')
        expected = DataArray([0, 0], {'x': coords['x'], 'c': -999}, 'x')
        self.assertDataArrayIdentical(expected, actual)

        self.assertVariableEqual(self.dv.reduce(np.mean, 'x'),
                                 self.v.reduce(np.mean, 'x'))

    def test_reduce_keep_attrs(self):
        # Test dropped attrs
        vm = self.va.mean()
        self.assertEqual(len(vm.attrs), 0)
        self.assertEqual(vm.attrs, OrderedDict())

        # Test kept attrs
        vm = self.va.mean(keep_attrs=True)
        self.assertEqual(len(vm.attrs), len(self.attrs))
        self.assertEqual(vm.attrs, self.attrs)

    def test_groupby_iter(self):
        for ((act_x, act_dv), (exp_x, exp_ds)) in \
                zip(self.dv.groupby('y'), self.ds.groupby('y')):
            self.assertEqual(exp_x, act_x)
            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
        for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv):
            self.assertDataArrayIdentical(exp_dv, act_dv)

    def make_groupby_example_array(self):
        da = self.dv.copy()
        da.coords['abc'] = ('y', np.array(['a'] * 9 + ['c'] + ['b'] * 10))
        da.coords['y'] = 20 + 100 * da['y']
        return da

    def test_groupby_properties(self):
        grouped = self.make_groupby_example_array().groupby('abc')
        expected_unique = Variable('abc', ['a', 'b', 'c'])
        self.assertVariableEqual(expected_unique, grouped.unique_coord)
        self.assertEqual(3, len(grouped))

    def test_groupby_apply_identity(self):
        expected = self.make_groupby_example_array()
        idx = expected.coords['y']
        identity = lambda x: x
        for g in ['x', 'y', 'abc', idx]:
            for shortcut in [False, True]:
                for squeeze in [False, True]:
                    grouped = expected.groupby(g, squeeze=squeeze)
                    actual = grouped.apply(identity, shortcut=shortcut)
                    self.assertDataArrayIdentical(expected, actual)

    def test_groupby_sum(self):
        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_sum_all = Dataset(
            {'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
                                                self.x[:, 10:].sum(),
                                                self.x[:, 9:10].sum()]).T),
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())

        expected = DataArray([array['y'].values[idx].sum() for idx
                              in [slice(9), slice(10, None), slice(9, 10)]],
                             [['a', 'b', 'c']], ['abc'])
        actual = array['y'].groupby('abc').apply(np.sum)
        self.assertDataArrayAllClose(expected, actual)
        actual = array['y'].groupby('abc').sum()
        self.assertDataArrayAllClose(expected, actual)

        expected_sum_axis1 = Dataset(
            {'foo': (['x', 'abc'], np.array([self.x[:, :9].sum(1),
                                             self.x[:, 10:].sum(1),
                                             self.x[:, 9:10].sum(1)]).T),
             'x': self.ds.variables['x'],
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_axis1,
                                     grouped.reduce(np.sum, 'y'))
        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))

    def test_groupby_apply_center(self):
        def center(x):
            return x - np.mean(x)

        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_ds = array.to_dataset()
        exp_data = np.hstack([center(self.x[:, :9]),
                              center(self.x[:, 9:10]),
                              center(self.x[:, 10:])])
        expected_ds['foo'] = (['x', 'y'], exp_data)
        expected_centered = expected_ds['foo']
        self.assertDataArrayAllClose(expected_centered, grouped.apply(center))

    def test_concat(self):
        self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20))
        foo = self.ds['foo']
        bar = self.ds['bar']
        # from dataset array:
        expected = DataArray(np.array([foo.values, bar.values]),
                             dims=['w', 'x', 'y'])
        actual = concat([foo, bar], 'w')
        self.assertDataArrayEqual(expected, actual)
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = concat(grouped, self.ds['x'])
        self.assertDataArrayIdentical(foo, stacked)

        with self.assertRaisesRegexp(ValueError, 'not identical'):
            concat([foo, bar], compat='identical')

    def test_align(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        with self.assertRaises(ValueError):
            self.dv + self.dv[:5]
        dv1, dv2 = align(self.dv, self.dv[:5], join='inner')
        self.assertDataArrayIdentical(dv1, self.dv[:5])
        self.assertDataArrayIdentical(dv2, self.dv[:5])

    def test_to_and_from_series(self):
        expected = self.dv.to_dataframe()['foo']
        actual = self.dv.to_series()
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)
        self.assertEqual('foo', actual.name)
        # test roundtrip
        self.assertDataArrayIdentical(self.dv, DataArray.from_series(actual))
        # test name is None
        actual.name = None
        expected_da = self.dv.rename(None)
        self.assertDataArrayIdentical(expected_da,
                                      DataArray.from_series(actual))

    def test_to_dataset(self):
        unnamed = DataArray([1, 2], dims='x')
        actual = unnamed.to_dataset()
        expected = Dataset({None: ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)
        self.assertIsNot(unnamed._dataset, actual)

        actual = unnamed.to_dataset('foo')
        expected = Dataset({'foo': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)

        named = DataArray([1, 2], dims='x', name='foo')
        actual = named.to_dataset()
        expected = Dataset({'foo': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)

        actual = named.to_dataset('bar')
        expected = Dataset({'bar': ('x', [1, 2])})
        self.assertDatasetIdentical(expected, actual)
Ejemplo n.º 9
0
class TestDataArray(TestCase):
    def setUp(self):
        self.attrs = {'attr1': 'value1', 'attr2': 2929}
        self.x = np.random.random((10, 20))
        self.v = Variable(['x', 'y'], self.x)
        self.va = Variable(['x', 'y'], self.x, self.attrs)
        self.ds = Dataset({'foo': self.v})
        self.dv = self.ds['foo']

    def test_repr(self):
        v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
        data_array = Dataset({
            'my_variable': v,
            'other': ([], 0)
        })['my_variable']
        expected = dedent("""
        <xray.DataArray 'my_variable' (time: 2, x: 3)>
        array([[1, 2, 3],
               [4, 5, 6]])
        Coordinates:
            time: Int64Index([0, 1], dtype='int64')
            x: Int64Index([0, 1, 2], dtype='int64')
        Linked dataset variables:
            other
        Attributes:
            foo: bar
        """).strip()
        self.assertEqual(expected, repr(data_array))

    def test_properties(self):
        self.assertDatasetIdentical(self.dv.dataset, self.ds)
        self.assertEqual(self.dv.name, 'foo')
        self.assertVariableEqual(self.dv.variable, self.v)
        self.assertArrayEqual(self.dv.values, self.v.values)
        for attr in ['dimensions', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
            self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
        self.assertEqual(len(self.dv), len(self.v))
        self.assertVariableEqual(self.dv, self.v)
        self.assertEqual(list(self.dv.coordinates), list(self.ds.coordinates))
        for k, v in iteritems(self.dv.coordinates):
            self.assertArrayEqual(v, self.ds.coordinates[k])
        with self.assertRaises(AttributeError):
            self.dv.name = 'bar'
        with self.assertRaises(AttributeError):
            self.dv.dataset = self.ds
        self.assertIsInstance(self.ds['x'].as_index, pd.Index)
        with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
            self.ds['foo'].as_index

    def test_constructor(self):
        data = np.random.random((2, 3))

        actual = DataArray(data)
        expected = Dataset({None: (['dim_0', 'dim_1'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [['a', 'b'], [-1, -2, -3]])
        expected = Dataset({
            None: (['dim_0', 'dim_1'], data),
            'dim_0': ('dim_0', ['a', 'b']),
            'dim_1': ('dim_1', [-1, -2, -3])
        })[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(
            data,
            [pd.Index(['a', 'b'], name='x'),
             pd.Index([-1, -2, -3], name='y')])
        expected = Dataset({
            None: (['x', 'y'], data),
            'x': ('x', ['a', 'b']),
            'y': ('y', [-1, -2, -3])
        })[None]
        self.assertDataArrayIdentical(expected, actual)

        indexes = [['a', 'b'], [-1, -2, -3]]
        actual = DataArray(data, indexes, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        indexes = [
            pd.Index(['a', 'b'], name='A'),
            pd.Index([-1, -2, -3], name='B')
        ]
        actual = DataArray(data, indexes, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        indexes = {'x': ['a', 'b'], 'y': [-1, -2, -3]}
        actual = DataArray(data, indexes, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        indexes = OrderedDict([('x', ['a', 'b']), ('y', [-1, -2, -3])])
        actual = DataArray(data, indexes)
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({
            None: (['x', 'y'], data),
            'x': ('x', ['a', 'b'])
        })[None]
        actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'but data has ndim'):
            DataArray(data, [[0, 1, 2]], ['x', 'y'])

        with self.assertRaisesRegexp(ValueError, 'not array dimensions'):
            DataArray(data, {'x': [0, 1, 2]}, ['a', 'b'])

        with self.assertRaisesRegexp(ValueError, 'must have the same length'):
            DataArray(data, {'x': [0, 1, 2]})

        actual = DataArray(data, dimensions=['x', 'y'])
        expected = Dataset({None: (['x', 'y'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dimensions=['x', 'y'], name='foo')
        expected = Dataset({'foo': (['x', 'y'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(TypeError, 'is not a string'):
            DataArray(data, dimensions=['x', None])

        actual = DataArray(data, name='foo')
        expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dimensions=['x', 'y'], attributes={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dimensions=['x', 'y'], encoding={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

    def test_constructor_from_self_described(self):
        data = [[-0.1, 21], [0, 2]]
        expected = DataArray(data,
                             indexes={
                                 'x': ['a', 'b'],
                                 'y': [-1, -2]
                             },
                             dimensions=['x', 'y'],
                             name='foobar',
                             attributes={'bar': 2},
                             encoding={'foo': 3})
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        frame = pd.DataFrame(data,
                             index=pd.Index(['a', 'b'], name='x'),
                             columns=pd.Index([-1, -2], name='y'))
        actual = DataArray(frame)
        self.assertDataArrayEqual(expected, actual)

        series = pd.Series(data[0], index=pd.Index([-1, -2], name='y'))
        actual = DataArray(series)
        self.assertDataArrayEqual(expected[0], actual)

        panel = pd.Panel({0: frame})
        actual = DataArray(panel)
        expected = DataArray([data], expected.coordinates, ['dim_0', 'x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        expected = DataArray(['a', 'b'], name='foo')
        actual = DataArray(pd.Index(['a', 'b'], name='foo'))
        self.assertDataArrayIdentical(expected, actual)

    def test_equals_and_identical(self):
        da2 = self.dv.copy()
        self.assertTrue(self.dv.equals(da2))
        self.assertTrue(self.dv.identical(da2))

        da3 = self.dv.rename('baz')
        self.assertTrue(self.dv.equals(da3))
        self.assertFalse(self.dv.identical(da3))

        da4 = self.dv.rename({'x': 'xxx'})
        self.assertFalse(self.dv.equals(da4))
        self.assertFalse(self.dv.identical(da4))

        da5 = self.dv.copy()
        da5.attrs['foo'] = 'bar'
        self.assertTrue(self.dv.equals(da5))
        self.assertFalse(self.dv.identical(da5))

        da6 = self.dv.copy()
        da6['x'] = ('x', -np.arange(10))
        self.assertFalse(self.dv.equals(da6))
        self.assertFalse(self.dv.identical(da6))

        da2[0, 0] = np.nan
        self.dv[0, 0] = np.nan
        self.assertTrue(self.dv.equals(da2))
        self.assertTrue(self.dv.identical(da2))

        da2[:] = np.nan
        self.assertFalse(self.dv.equals(da2))
        self.assertFalse(self.dv.identical(da2))

    def test_items(self):
        # strings pull out dataarrays
        self.assertDataArrayIdentical(self.dv, self.ds['foo'])
        x = self.dv['x']
        y = self.dv['y']
        self.assertDataArrayIdentical(self.ds['x'], x)
        self.assertDataArrayIdentical(self.ds['y'], y)
        # integer indexing
        I = ReturnItem()
        for i in [
                I[:], I[...], I[x.values], I[x.variable], I[x], I[x, y],
                I[x.values > -1], I[x.variable > -1], I[x > -1], I[x > -1,
                                                                   y > -1]
        ]:
            self.assertVariableEqual(self.dv, self.dv[i])
        for i in [
                I[0], I[:, 0], I[:3, :2], I[x.values[:3]], I[x.variable[:3]],
                I[x[:3]], I[x[:3], y[:4]], I[x.values > 3], I[x.variable > 3],
                I[x > 3], I[x > 3, y > 3]
        ]:
            self.assertVariableEqual(self.v[i], self.dv[i])
        # make sure we always keep the array around, even if it's a scalar
        self.assertVariableEqual(self.dv[0, 0], self.dv.variable[0, 0])
        for k in ['x', 'y', 'foo']:
            self.assertIn(k, self.dv[0, 0].dataset)

    def test_indexed(self):
        self.assertEqual(self.dv[0].dataset, self.ds.indexed(x=0))
        self.assertEqual(self.dv[:3, :5].dataset,
                         self.ds.indexed(x=slice(3), y=slice(5)))
        self.assertDataArrayIdentical(self.dv, self.dv.indexed(x=slice(None)))
        self.assertDataArrayIdentical(self.dv[:3], self.dv.indexed(x=slice(3)))

    def test_labeled(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da, da.labeled(x=slice(None)))
        self.assertDataArrayIdentical(da[1], da.labeled(x='b'))
        self.assertDataArrayIdentical(da[:3], da.labeled(x=slice('c')))

    def test_loc(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da[:3], da.loc[:'c'])
        self.assertDataArrayIdentical(da[1], da.loc['b'])
        self.assertDataArrayIdentical(da[:3], da.loc[['a', 'b', 'c']])
        self.assertDataArrayIdentical(da[:3, :4], da.loc[['a', 'b', 'c'],
                                                         np.arange(4)])
        da.loc['a':'j'] = 0
        self.assertTrue(np.all(da.values == 0))

    def test_reindex(self):
        foo = self.dv
        bar = self.dv[:2, :2]
        self.assertDataArrayIdentical(foo.reindex_like(bar), bar)

        expected = foo.copy()
        expected[:] = np.nan
        expected[:2, :2] = bar
        self.assertDataArrayIdentical(bar.reindex_like(foo), expected)

    def test_rename(self):
        renamed = self.dv.rename('bar')
        self.assertEqual(renamed.dataset, self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

        renamed = self.dv.rename({'foo': 'bar'})
        self.assertEqual(renamed.dataset, self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

    def test_dataset_getitem(self):
        dv = self.ds['foo']
        self.assertDataArrayIdentical(dv, self.dv)

    def test_array_interface(self):
        self.assertArrayEqual(np.asarray(self.dv), self.x)
        # test patched in methods
        self.assertArrayEqual(self.dv.astype(float), self.v.astype(float))
        self.assertVariableEqual(self.dv.argsort(), self.v.argsort())
        self.assertVariableEqual(self.dv.clip(2, 3), self.v.clip(2, 3))
        # test ufuncs
        expected = deepcopy(self.ds)
        expected['foo'][:] = np.sin(self.x)
        self.assertDataArrayEqual(expected['foo'], np.sin(self.dv))
        self.assertDataArrayEqual(self.dv, np.maximum(self.v, self.dv))
        bar = Variable(['x', 'y'], np.zeros((10, 20)))
        self.assertDataArrayEqual(self.dv, np.maximum(self.dv, bar))

    def test_math(self):
        x = self.x
        v = self.v
        a = self.dv
        # variable math was already tested extensively, so let's just make sure
        # that all types are properly converted here
        self.assertDataArrayEqual(a, +a)
        self.assertDataArrayEqual(a, a + 0)
        self.assertDataArrayEqual(a, 0 + a)
        self.assertDataArrayEqual(a, a + 0 * v)
        self.assertDataArrayEqual(a, 0 * v + a)
        self.assertDataArrayEqual(a, a + 0 * x)
        self.assertDataArrayEqual(a, 0 * x + a)
        self.assertDataArrayEqual(a, a + 0 * a)
        self.assertDataArrayEqual(a, 0 * a + a)
        # test different indices
        ds2 = self.ds.update({'x': ('x', 3 + np.arange(10))}, inplace=False)
        b = ds2['foo']
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            a + b
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            b + a
        with self.assertRaisesRegexp(TypeError, 'datasets do not support'):
            a + a.dataset

    def test_dataset_math(self):
        # verify that mathematical operators keep around the expected variables
        # when doing math with dataset arrays from one or more aligned datasets
        obs = Dataset({
            'tmin': ('x', np.arange(5)),
            'tmax': ('x', 10 + np.arange(5)),
            'x': ('x', 0.5 * np.arange(5))
        })

        actual = 2 * obs['tmax']
        expected = Dataset({
            'tmax2': ('x', 2 * (10 + np.arange(5))),
            'x': obs['x']
        })['tmax2']
        self.assertDataArrayEqual(actual, expected)

        actual = obs['tmax'] - obs['tmin']
        expected = Dataset({
            'trange': ('x', 10 * np.ones(5)),
            'x': obs['x']
        })['trange']
        self.assertDataArrayEqual(actual, expected)

        sim = Dataset({
            'tmin': ('x', 1 + np.arange(5)),
            'tmax': ('x', 11 + np.arange(5)),
            'x': ('x', 0.5 * np.arange(5))
        })

        actual = sim['tmin'] - obs['tmin']
        expected = Dataset({
            'error': ('x', np.ones(5)),
            'x': obs['x']
        })['error']
        self.assertDataArrayEqual(actual, expected)

        # in place math shouldn't remove or conflict with other variables
        actual = deepcopy(sim['tmin'])
        actual -= obs['tmin']
        expected = Dataset({
            'tmin': ('x', np.ones(5)),
            'tmax': sim['tmax'],
            'x': sim['x']
        })['tmin']
        self.assertDataArrayEqual(actual, expected)

    def test_math_name(self):
        # Verify that name is preserved only when it can be done unambiguously.
        # The rule (copied from pandas.Series) is keep the current name only if
        # the other object has no name attribute and this object isn't a
        # coordinate; otherwise reset to None.
        ds = self.ds
        a = self.dv
        self.assertEqual((+a).name, 'foo')
        self.assertEqual((a + 0).name, 'foo')
        self.assertIs((a + a.rename(None)).name, None)
        self.assertIs((a + a).name, None)
        self.assertIs((+ds['x']).name, None)
        self.assertIs((ds['x'] + 0).name, None)
        self.assertIs((a + ds['x']).name, None)

    def test_coord_math(self):
        ds = Dataset({'x': ('x', 1 + np.arange(3))})
        expected = ds.copy()
        expected['x2'] = ('x', np.arange(3))
        actual = ds['x'] - 1
        self.assertDataArrayEqual(expected['x2'], actual)

    def test_item_math(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        self.assertVariableEqual(self.dv + self.dv[0, 0],
                                 self.dv + self.dv[0, 0].values)
        new_data = self.x[0][None, :] + self.x[:, 0][:, None]
        self.assertVariableEqual(self.dv[:, 0] + self.dv[0],
                                 Variable(['x', 'y'], new_data))
        self.assertVariableEqual(self.dv[0] + self.dv[:, 0],
                                 Variable(['y', 'x'], new_data.T))

    def test_inplace_math(self):
        x = self.x
        v = self.v
        a = self.dv
        b = a
        b += 1
        self.assertIs(b, a)
        self.assertIs(b.variable, v)
        self.assertArrayEqual(b.values, x)
        self.assertIs(source_ndarray(b.values), x)
        self.assertDatasetIdentical(b.dataset, self.ds)

    def test_transpose(self):
        self.assertVariableEqual(self.dv.variable.transpose(),
                                 self.dv.transpose())

    def test_squeeze(self):
        self.assertVariableEqual(self.dv.variable.squeeze(), self.dv.squeeze())

    def test_reduce(self):
        self.assertVariableEqual(self.dv.reduce(np.mean, 'x'),
                                 self.v.reduce(np.mean, 'x'))
        # needs more...
        # should check which extra dimensions are dropped

    def test_reduce_keep_attrs(self):
        # Test dropped attrs
        vm = self.va.mean()
        self.assertEqual(len(vm.attrs), 0)
        self.assertEqual(vm.attrs, OrderedDict())

        # Test kept attrs
        vm = self.va.mean(keep_attrs=True)
        self.assertEqual(len(vm.attrs), len(self.attrs))
        self.assertEqual(vm.attrs, self.attrs)

    def test_unselect(self):
        with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'):
            self.dv.unselect('foo')
        with self.assertRaisesRegexp(ValueError, 'must be a variable in'):
            self.dv.unselect('y')

    def test_groupby_iter(self):
        for ((act_x, act_dv), (exp_x, exp_ds)) in \
                zip(self.dv.groupby('y'), self.ds.groupby('y')):
            self.assertEqual(exp_x, act_x)
            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
        for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv):
            self.assertDataArrayIdentical(exp_dv, act_dv)

    def test_groupby(self):
        agg_var = Variable(['y'], np.array(['a'] * 9 + ['c'] + ['b'] * 10))
        self.dv['abc'] = agg_var
        self.dv['y'] = 20 + 100 * self.ds['y'].variable

        identity = lambda x: x
        for g in ['x', 'y', 'abc']:
            for shortcut in [False, True]:
                for squeeze in [False, True]:
                    expected = self.dv
                    grouped = self.dv.groupby(g, squeeze=squeeze)
                    actual = grouped.apply(identity, shortcut=shortcut)
                    self.assertDataArrayIdentical(expected, actual)

        grouped = self.dv.groupby('abc', squeeze=True)
        expected_sum_all = Dataset({
            'foo':
            Variable(['abc'],
                     np.array([
                         self.x[:, :9].sum(), self.x[:, 10:].sum(),
                         self.x[:, 9:10].sum()
                     ]).T),
            'abc':
            Variable(['abc'], np.array(['a', 'b', 'c']))
        })['foo']
        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())
        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())
        expected_unique = Variable('abc', ['a', 'b', 'c'])
        self.assertVariableEqual(expected_unique, grouped.unique_coord)
        self.assertEqual(3, len(grouped))

        grouped = self.dv.groupby('abc', squeeze=False)
        self.assertDataArrayAllClose(expected_sum_all,
                                     grouped.sum(dimension=None))

        expected_sum_axis1 = Dataset({
            'foo': (['x', 'abc'],
                    np.array([
                        self.x[:, :9].sum(1), self.x[:, 10:].sum(1),
                        self.x[:, 9:10].sum(1)
                    ]).T),
            'x':
            self.ds.variables['x'],
            'abc':
            Variable(['abc'], np.array(['a', 'b', 'c']))
        })['foo']
        self.assertDataArrayAllClose(expected_sum_axis1,
                                     grouped.reduce(np.sum, 'y'))
        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))

        def center(x):
            return x - np.mean(x)

        expected_ds = self.dv.dataset.copy()
        exp_data = np.hstack([
            center(self.x[:, :9]),
            center(self.x[:, 9:10]),
            center(self.x[:, 10:])
        ])
        expected_ds['foo'] = (['x', 'y'], exp_data)
        expected_centered = expected_ds['foo']
        self.assertDataArrayAllClose(expected_centered, grouped.apply(center))

    def test_concat(self):
        self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20))
        foo = self.ds['foo'].select()
        bar = self.ds['bar'].rename('foo').select()
        # from dataset array:
        self.assertVariableEqual(
            Variable(['w', 'x', 'y'], np.array([foo.values, bar.values])),
            DataArray.concat([foo, bar], 'w'))
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = DataArray.concat(grouped, self.ds['x'])
        self.assertDataArrayIdentical(foo.select(), stacked)

    def test_align(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        with self.assertRaises(ValueError):
            self.dv + self.dv[:5]
        dv1, dv2 = align(self.dv, self.dv[:5], join='inner')
        self.assertDataArrayIdentical(dv1, self.dv[:5])
        self.assertDataArrayIdentical(dv2, self.dv[:5])

    def test_to_and_from_series(self):
        expected = self.dv.to_dataframe()['foo']
        actual = self.dv.to_series()
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)
        self.assertEqual('foo', actual.name)
        # test roundtrip
        self.assertDataArrayIdentical(self.dv, DataArray.from_series(actual))
        # test name is None
        actual.name = None
        expected_da = self.dv.rename(None)
        self.assertDataArrayIdentical(expected_da,
                                      DataArray.from_series(actual))
Ejemplo n.º 10
0
class TestDataArray(TestCase):
    def setUp(self):
        self.attrs = {'attr1': 'value1', 'attr2': 2929}
        self.x = np.random.random((10, 20))
        self.v = Variable(['x', 'y'], self.x)
        self.va = Variable(['x', 'y'], self.x, self.attrs)
        self.ds = Dataset({'foo': self.v})
        self.dv = self.ds['foo']

    def test_repr(self):
        v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'})
        data_array = Dataset({'my_variable': v, 'other': ([], 0)}
                             )['my_variable']
        expected = dedent("""
        <xray.DataArray 'my_variable' (time: 2, x: 3)>
        array([[1, 2, 3],
               [4, 5, 6]])
        Coordinates:
            time: Int64Index([0, 1], dtype='int64')
            x: Int64Index([0, 1, 2], dtype='int64')
        Linked dataset variables:
            other
        Attributes:
            foo: bar
        """).strip()
        self.assertEqual(expected, repr(data_array))

    def test_properties(self):
        self.assertDatasetIdentical(self.dv.dataset, self.ds)
        self.assertEqual(self.dv.name, 'foo')
        self.assertVariableEqual(self.dv.variable, self.v)
        self.assertArrayEqual(self.dv.values, self.v.values)
        for attr in ['dimensions', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
            self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
        self.assertEqual(len(self.dv), len(self.v))
        self.assertVariableEqual(self.dv, self.v)
        self.assertEqual(list(self.dv.coordinates), list(self.ds.coordinates))
        for k, v in iteritems(self.dv.coordinates):
            self.assertArrayEqual(v, self.ds.coordinates[k])
        with self.assertRaises(AttributeError):
            self.dv.name = 'bar'
        with self.assertRaises(AttributeError):
            self.dv.dataset = self.ds
        self.assertIsInstance(self.ds['x'].as_index, pd.Index)
        with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
            self.ds['foo'].as_index

    def test_constructor(self):
        data = np.random.random((2, 3))

        actual = DataArray(data)
        expected = Dataset({None: (['dim_0', 'dim_1'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [['a', 'b'], [-1, -2, -3]])
        expected = Dataset({None: (['dim_0', 'dim_1'], data),
                            'dim_0': ('dim_0', ['a', 'b']),
                            'dim_1': ('dim_1', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, [pd.Index(['a', 'b'], name='x'),
                                  pd.Index([-1, -2, -3], name='y')])
        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b']),
                            'y': ('y', [-1, -2, -3])})[None]
        self.assertDataArrayIdentical(expected, actual)

        indexes = [['a', 'b'], [-1, -2, -3]]
        actual = DataArray(data, indexes, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        indexes = [pd.Index(['a', 'b'], name='A'),
                   pd.Index([-1, -2, -3], name='B')]
        actual = DataArray(data, indexes, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        indexes = {'x': ['a', 'b'], 'y': [-1, -2, -3]}
        actual = DataArray(data, indexes, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        indexes = OrderedDict([('x', ['a', 'b']), ('y', [-1, -2, -3])])
        actual = DataArray(data, indexes)
        self.assertDataArrayIdentical(expected, actual)

        expected = Dataset({None: (['x', 'y'], data),
                            'x': ('x', ['a', 'b'])})[None]
        actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'but data has ndim'):
            DataArray(data, [[0, 1, 2]], ['x', 'y'])

        with self.assertRaisesRegexp(ValueError, 'not array dimensions'):
            DataArray(data, {'x': [0, 1, 2]}, ['a', 'b'])

        with self.assertRaisesRegexp(ValueError, 'must have the same length'):
            DataArray(data, {'x': [0, 1, 2]})

        actual = DataArray(data, dimensions=['x', 'y'])
        expected = Dataset({None: (['x', 'y'], data)})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dimensions=['x', 'y'], name='foo')
        expected = Dataset({'foo': (['x', 'y'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        with self.assertRaisesRegexp(TypeError, 'is not a string'):
            DataArray(data, dimensions=['x', None])

        actual = DataArray(data, name='foo')
        expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo']
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dimensions=['x', 'y'], attributes={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

        actual = DataArray(data, dimensions=['x', 'y'], encoding={'bar': 2})
        expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None]
        self.assertDataArrayIdentical(expected, actual)

    def test_constructor_from_self_described(self):
        data = [[-0.1, 21], [0, 2]]
        expected = DataArray(data,
                             indexes={'x': ['a', 'b'], 'y': [-1, -2]},
                             dimensions=['x', 'y'], name='foobar',
                             attributes={'bar': 2}, encoding={'foo': 3})
        actual = DataArray(expected)
        self.assertDataArrayIdentical(expected, actual)

        frame = pd.DataFrame(data, index=pd.Index(['a', 'b'], name='x'),
                             columns=pd.Index([-1, -2], name='y'))
        actual = DataArray(frame)
        self.assertDataArrayEqual(expected, actual)

        series = pd.Series(data[0], index=pd.Index([-1, -2], name='y'))
        actual = DataArray(series)
        self.assertDataArrayEqual(expected[0], actual)

        panel = pd.Panel({0: frame})
        actual = DataArray(panel)
        expected = DataArray([data], expected.coordinates, ['dim_0', 'x', 'y'])
        self.assertDataArrayIdentical(expected, actual)

        expected = DataArray(['a', 'b'], name='foo')
        actual = DataArray(pd.Index(['a', 'b'], name='foo'))
        self.assertDataArrayIdentical(expected, actual)

    def test_equals_and_identical(self):
        da2 = self.dv.copy()
        self.assertTrue(self.dv.equals(da2))
        self.assertTrue(self.dv.identical(da2))

        da3 = self.dv.rename('baz')
        self.assertTrue(self.dv.equals(da3))
        self.assertFalse(self.dv.identical(da3))

        da4 = self.dv.rename({'x': 'xxx'})
        self.assertFalse(self.dv.equals(da4))
        self.assertFalse(self.dv.identical(da4))

        da5 = self.dv.copy()
        da5.attrs['foo'] = 'bar'
        self.assertTrue(self.dv.equals(da5))
        self.assertFalse(self.dv.identical(da5))

        da6 = self.dv.copy()
        da6['x'] = ('x', -np.arange(10))
        self.assertFalse(self.dv.equals(da6))
        self.assertFalse(self.dv.identical(da6))

        da2[0, 0] = np.nan
        self.dv[0, 0] = np.nan
        self.assertTrue(self.dv.equals(da2))
        self.assertTrue(self.dv.identical(da2))

        da2[:] = np.nan
        self.assertFalse(self.dv.equals(da2))
        self.assertFalse(self.dv.identical(da2))

    def test_items(self):
        # strings pull out dataarrays
        self.assertDataArrayIdentical(self.dv, self.ds['foo'])
        x = self.dv['x']
        y = self.dv['y']
        self.assertDataArrayIdentical(self.ds['x'], x)
        self.assertDataArrayIdentical(self.ds['y'], y)
        # integer indexing
        I = ReturnItem()
        for i in [I[:], I[...], I[x.values], I[x.variable], I[x], I[x, y],
                  I[x.values > -1], I[x.variable > -1], I[x > -1],
                  I[x > -1, y > -1]]:
            self.assertVariableEqual(self.dv, self.dv[i])
        for i in [I[0], I[:, 0], I[:3, :2],
                  I[x.values[:3]], I[x.variable[:3]], I[x[:3]], I[x[:3], y[:4]],
                  I[x.values > 3], I[x.variable > 3], I[x > 3], I[x > 3, y > 3]]:
            self.assertVariableEqual(self.v[i], self.dv[i])
        # make sure we always keep the array around, even if it's a scalar
        self.assertVariableEqual(self.dv[0, 0], self.dv.variable[0, 0])
        for k in ['x', 'y', 'foo']:
            self.assertIn(k, self.dv[0, 0].dataset)

    def test_indexed(self):
        self.assertEqual(self.dv[0].dataset, self.ds.indexed(x=0))
        self.assertEqual(self.dv[:3, :5].dataset,
                         self.ds.indexed(x=slice(3), y=slice(5)))
        self.assertDataArrayIdentical(self.dv, self.dv.indexed(x=slice(None)))
        self.assertDataArrayIdentical(self.dv[:3], self.dv.indexed(x=slice(3)))

    def test_labeled(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da, da.labeled(x=slice(None)))
        self.assertDataArrayIdentical(da[1], da.labeled(x='b'))
        self.assertDataArrayIdentical(da[:3], da.labeled(x=slice('c')))

    def test_loc(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        da = self.ds['foo']
        self.assertDataArrayIdentical(da[:3], da.loc[:'c'])
        self.assertDataArrayIdentical(da[1], da.loc['b'])
        self.assertDataArrayIdentical(da[:3], da.loc[['a', 'b', 'c']])
        self.assertDataArrayIdentical(da[:3, :4],
                                      da.loc[['a', 'b', 'c'], np.arange(4)])
        da.loc['a':'j'] = 0
        self.assertTrue(np.all(da.values == 0))

    def test_reindex(self):
        foo = self.dv
        bar = self.dv[:2, :2]
        self.assertDataArrayIdentical(foo.reindex_like(bar), bar)

        expected = foo.copy()
        expected[:] = np.nan
        expected[:2, :2] = bar
        self.assertDataArrayIdentical(bar.reindex_like(foo), expected)

    def test_rename(self):
        renamed = self.dv.rename('bar')
        self.assertEqual(renamed.dataset, self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

        renamed = self.dv.rename({'foo': 'bar'})
        self.assertEqual(renamed.dataset, self.ds.rename({'foo': 'bar'}))
        self.assertEqual(renamed.name, 'bar')

    def test_dataset_getitem(self):
        dv = self.ds['foo']
        self.assertDataArrayIdentical(dv, self.dv)

    def test_array_interface(self):
        self.assertArrayEqual(np.asarray(self.dv), self.x)
        # test patched in methods
        self.assertArrayEqual(self.dv.astype(float), self.v.astype(float))
        self.assertVariableEqual(self.dv.argsort(), self.v.argsort())
        self.assertVariableEqual(self.dv.clip(2, 3), self.v.clip(2, 3))
        # test ufuncs
        expected = deepcopy(self.ds)
        expected['foo'][:] = np.sin(self.x)
        self.assertDataArrayEqual(expected['foo'], np.sin(self.dv))
        self.assertDataArrayEqual(self.dv, np.maximum(self.v, self.dv))
        bar = Variable(['x', 'y'], np.zeros((10, 20)))
        self.assertDataArrayEqual(self.dv, np.maximum(self.dv, bar))

    def test_math(self):
        x = self.x
        v = self.v
        a = self.dv
        # variable math was already tested extensively, so let's just make sure
        # that all types are properly converted here
        self.assertDataArrayEqual(a, +a)
        self.assertDataArrayEqual(a, a + 0)
        self.assertDataArrayEqual(a, 0 + a)
        self.assertDataArrayEqual(a, a + 0 * v)
        self.assertDataArrayEqual(a, 0 * v + a)
        self.assertDataArrayEqual(a, a + 0 * x)
        self.assertDataArrayEqual(a, 0 * x + a)
        self.assertDataArrayEqual(a, a + 0 * a)
        self.assertDataArrayEqual(a, 0 * a + a)
        # test different indices
        ds2 = self.ds.update({'x': ('x', 3 + np.arange(10))}, inplace=False)
        b = ds2['foo']
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            a + b
        with self.assertRaisesRegexp(ValueError, 'not aligned'):
            b + a
        with self.assertRaisesRegexp(TypeError, 'datasets do not support'):
            a + a.dataset

    def test_dataset_math(self):
        # verify that mathematical operators keep around the expected variables
        # when doing math with dataset arrays from one or more aligned datasets
        obs = Dataset({'tmin': ('x', np.arange(5)),
                       'tmax': ('x', 10 + np.arange(5)),
                       'x': ('x', 0.5 * np.arange(5))})

        actual = 2 * obs['tmax']
        expected = Dataset({'tmax2': ('x', 2 * (10 + np.arange(5))),
                            'x': obs['x']})['tmax2']
        self.assertDataArrayEqual(actual, expected)

        actual = obs['tmax'] - obs['tmin']
        expected = Dataset({'trange': ('x', 10 * np.ones(5)),
                            'x': obs['x']})['trange']
        self.assertDataArrayEqual(actual, expected)

        sim = Dataset({'tmin': ('x', 1 + np.arange(5)),
                       'tmax': ('x', 11 + np.arange(5)),
                       'x': ('x', 0.5 * np.arange(5))})

        actual = sim['tmin'] - obs['tmin']
        expected = Dataset({'error': ('x', np.ones(5)),
                            'x': obs['x']})['error']
        self.assertDataArrayEqual(actual, expected)

        # in place math shouldn't remove or conflict with other variables
        actual = deepcopy(sim['tmin'])
        actual -= obs['tmin']
        expected = Dataset({'tmin': ('x', np.ones(5)),
                            'tmax': sim['tmax'],
                            'x': sim['x']})['tmin']
        self.assertDataArrayEqual(actual, expected)

    def test_math_name(self):
        # Verify that name is preserved only when it can be done unambiguously.
        # The rule (copied from pandas.Series) is keep the current name only if
        # the other object has no name attribute and this object isn't a
        # coordinate; otherwise reset to None.
        ds = self.ds
        a = self.dv
        self.assertEqual((+a).name, 'foo')
        self.assertEqual((a + 0).name, 'foo')
        self.assertIs((a + a.rename(None)).name, None)
        self.assertIs((a + a).name, None)
        self.assertIs((+ds['x']).name, None)
        self.assertIs((ds['x'] + 0).name, None)
        self.assertIs((a + ds['x']).name, None)

    def test_coord_math(self):
        ds = Dataset({'x': ('x', 1 + np.arange(3))})
        expected = ds.copy()
        expected['x2'] = ('x', np.arange(3))
        actual = ds['x'] - 1
        self.assertDataArrayEqual(expected['x2'], actual)

    def test_item_math(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        self.assertVariableEqual(self.dv + self.dv[0, 0],
                               self.dv + self.dv[0, 0].values)
        new_data = self.x[0][None, :] + self.x[:, 0][:, None]
        self.assertVariableEqual(self.dv[:, 0] + self.dv[0],
                                 Variable(['x', 'y'], new_data))
        self.assertVariableEqual(self.dv[0] + self.dv[:, 0],
                                 Variable(['y', 'x'], new_data.T))

    def test_inplace_math(self):
        x = self.x
        v = self.v
        a = self.dv
        b = a
        b += 1
        self.assertIs(b, a)
        self.assertIs(b.variable, v)
        self.assertArrayEqual(b.values, x)
        self.assertIs(source_ndarray(b.values), x)
        self.assertDatasetIdentical(b.dataset, self.ds)

    def test_transpose(self):
        self.assertVariableEqual(self.dv.variable.transpose(),
                               self.dv.transpose())

    def test_squeeze(self):
        self.assertVariableEqual(self.dv.variable.squeeze(), self.dv.squeeze())

    def test_reduce(self):
        self.assertVariableEqual(self.dv.reduce(np.mean, 'x'),
                            self.v.reduce(np.mean, 'x'))
        # needs more...
        # should check which extra dimensions are dropped

    def test_reduce_keep_attrs(self):
        # Test dropped attrs
        vm = self.va.mean()
        self.assertEqual(len(vm.attrs), 0)
        self.assertEqual(vm.attrs, OrderedDict())

        # Test kept attrs
        vm = self.va.mean(keep_attrs=True)
        self.assertEqual(len(vm.attrs), len(self.attrs))
        self.assertEqual(vm.attrs, self.attrs)

    def test_unselect(self):
        with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'):
            self.dv.unselect('foo')
        with self.assertRaisesRegexp(ValueError, 'must be a variable in'):
            self.dv.unselect('y')

    def test_groupby_iter(self):
        for ((act_x, act_dv), (exp_x, exp_ds)) in \
                zip(self.dv.groupby('y'), self.ds.groupby('y')):
            self.assertEqual(exp_x, act_x)
            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
        for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv):
            self.assertDataArrayIdentical(exp_dv, act_dv)

    def test_groupby(self):
        agg_var = Variable(['y'], np.array(['a'] * 9 + ['c'] + ['b'] * 10))
        self.dv['abc'] = agg_var
        self.dv['y'] = 20 + 100 * self.ds['y'].variable

        identity = lambda x: x
        for g in ['x', 'y', 'abc']:
            for shortcut in [False, True]:
                for squeeze in [False, True]:
                    expected = self.dv
                    grouped = self.dv.groupby(g, squeeze=squeeze)
                    actual = grouped.apply(identity, shortcut=shortcut)
                    self.assertDataArrayIdentical(expected, actual)

        grouped = self.dv.groupby('abc', squeeze=True)
        expected_sum_all = Dataset(
            {'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
                                                self.x[:, 10:].sum(),
                                                self.x[:, 9:10].sum()]).T),
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(
            expected_sum_all, grouped.reduce(np.sum))
        self.assertDataArrayAllClose(
            expected_sum_all, grouped.sum())
        self.assertDataArrayAllClose(
            expected_sum_all, grouped.sum())
        expected_unique = Variable('abc', ['a', 'b', 'c'])
        self.assertVariableEqual(expected_unique, grouped.unique_coord)
        self.assertEqual(3, len(grouped))

        grouped = self.dv.groupby('abc', squeeze=False)
        self.assertDataArrayAllClose(
            expected_sum_all, grouped.sum(dimension=None))

        expected_sum_axis1 = Dataset(
            {'foo': (['x', 'abc'], np.array([self.x[:, :9].sum(1),
                                             self.x[:, 10:].sum(1),
                                             self.x[:, 9:10].sum(1)]).T),
             'x': self.ds.variables['x'],
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_axis1,
                                     grouped.reduce(np.sum, 'y'))
        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))

        def center(x):
            return x - np.mean(x)

        expected_ds = self.dv.dataset.copy()
        exp_data = np.hstack([center(self.x[:, :9]),
                              center(self.x[:, 9:10]),
                              center(self.x[:, 10:])])
        expected_ds['foo'] = (['x', 'y'], exp_data)
        expected_centered = expected_ds['foo']
        self.assertDataArrayAllClose(expected_centered,
                                     grouped.apply(center))

    def test_concat(self):
        self.ds['bar'] = Variable(['x', 'y'], np.random.randn(10, 20))
        foo = self.ds['foo'].select()
        bar = self.ds['bar'].rename('foo').select()
        # from dataset array:
        self.assertVariableEqual(Variable(['w', 'x', 'y'],
                                          np.array([foo.values, bar.values])),
                                 DataArray.concat([foo, bar], 'w'))
        # from iteration:
        grouped = [g for _, g in foo.groupby('x')]
        stacked = DataArray.concat(grouped, self.ds['x'])
        self.assertDataArrayIdentical(foo.select(), stacked)

    def test_align(self):
        self.ds['x'] = ('x', np.array(list('abcdefghij')))
        with self.assertRaises(ValueError):
            self.dv + self.dv[:5]
        dv1, dv2 = align(self.dv, self.dv[:5], join='inner')
        self.assertDataArrayIdentical(dv1, self.dv[:5])
        self.assertDataArrayIdentical(dv2, self.dv[:5])

    def test_to_and_from_series(self):
        expected = self.dv.to_dataframe()['foo']
        actual = self.dv.to_series()
        self.assertArrayEqual(expected.values, actual.values)
        self.assertArrayEqual(expected.index.values, actual.index.values)
        self.assertEqual('foo', actual.name)
        # test roundtrip
        self.assertDataArrayIdentical(self.dv, DataArray.from_series(actual))
        # test name is None
        actual.name = None
        expected_da = self.dv.rename(None)
        self.assertDataArrayIdentical(expected_da,
                                      DataArray.from_series(actual))