def test_dataset_2D_aggregate_partial_hm(self):
     array = np.random.rand(11, 11)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=['x', 'y'], vdims=['z'])
     self.assertEqual(dataset.aggregate(['x'], np.mean),
                      Dataset({'x':self.xs, 'z': np.mean(array, axis=0)},
                              kdims=['x'], vdims=['z']))
Beispiel #2
0
 def test_dimension_values_vdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.dimension_values('unknown', flat=False),
                      np.flipud(np.array([[ 0,  4,  8],
                                          [ 1,  5,  9],
                                          [ 2,  6, 10],
                                          [ 3,  7, 11]], dtype=np.int32).T))
 def test_dataset_groupby_drop_dims_with_vdim(self):
     array = np.random.rand(3, 20, 10)
     ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array, 'Val2': array*2},
                  kdims=['x', 'y', 'z'], vdims=['Val', 'Val2'])
     with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], (ds, Dataset)):
         partial = ds.to(Dataset, kdims=['Val'], vdims=['Val2'], groupby='y')
     self.assertEqual(partial.last['Val'], array[:, -1, :].T.flatten())
 def test_dataset_groupby_drop_dims_dynamic_with_vdim(self):
     array = da.from_array(np.random.rand(3, 20, 10), 3)
     ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array, 'Val2': array*2},
                  kdims=['x', 'y', 'z'], vdims=['Val', 'Val2'])
     with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], (ds, Dataset)):
         partial = ds.to(Dataset, kdims=['Val'], vdims=['Val2'], groupby='y', dynamic=True)
         self.assertEqual(partial[19]['Val'], array[:, -1, :].T.flatten().compute())
 def test_dataset_2D_aggregate_partial_hm_alias(self):
     array = da.from_array(np.random.rand(11, 11), 3)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
     self.assertEqual(dataset.aggregate(['X'], np.mean),
                      Dataset({'x':self.xs, 'z': np.mean(array, axis=0).compute()},
                              kdims=[('x', 'X')], vdims=[('z', 'Z')]))
 def test_dataset_2D_reduce_hm_alias(self):
     array = np.random.rand(11, 11)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
     self.assertEqual(np.array(dataset.reduce(['x', 'y'], np.mean)),
                      np.mean(array))
     self.assertEqual(np.array(dataset.reduce(['X', 'Y'], np.mean)),
                      np.mean(array))
 def test_irregular_grid_data_values_inverted_y(self):
     nx, ny = 20, 5
     xs, ys = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)*-1+0.5)
     zs = np.arange(100).reshape(5, 20)
     ds = Dataset((xs, ys, zs), ['x', 'y'], 'z')
     self.assertEqual(ds.dimension_values(2, flat=False), zs)
     self.assertEqual(ds.interface.coords(ds, 'x'), xs)
     self.assertEqual(ds.interface.coords(ds, 'y'), ys)
 def test_multi_dimension_groupby(self):
     x, y, z = list('AB'*10), np.arange(20)%3, np.arange(20)
     ds = Dataset((x, y, z), kdims=['x', 'y'], vdims=['z'],  datatype=[self.datatype])
     keys = [('A', 0), ('B', 1), ('A', 2), ('B', 0), ('A', 1), ('B', 2)]
     grouped = ds.groupby(['x', 'y'])
     self.assertEqual(grouped.keys(), keys)
     group = Dataset({'z': [5, 11, 17]}, vdims=['z'])
     self.assertEqual(grouped.last, group)
Beispiel #9
0
 def test_select_dropped_dimensions_restoration(self):
     d = np.random.randn(3, 8)
     da = xr.DataArray(d, name='stuff', dims=['chain', 'value'],
         coords=dict(chain=range(d.shape[0]), value=range(d.shape[1])))
     ds = Dataset(da)
     t = ds.select(chain=0)
     self.assertEqual(t.data.dims , dict(chain=1,value=8))
     self.assertEqual(t.data.stuff.shape , (1,8))
 def test_dataset_groupby_dynamic(self):
     array = np.random.rand(11, 11)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=['x', 'y'], vdims=['z'])
     with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], dataset):
         grouped = dataset.groupby('x', dynamic=True)
     first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
                     kdims=['y'], vdims=['z'])
     self.assertEqual(grouped[0], first)
 def test_dataset_groupby_dynamic_alias(self):
     array = da.from_array(np.random.rand(11, 11), 3)
     dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
                       kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')])
     with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], dataset):
         grouped = dataset.groupby('X', dynamic=True)
     first = Dataset({'y': self.y_ints, 'z': array[:, 0].compute()},
                     kdims=[('y', 'Y')], vdims=[('z', 'Z')])
     self.assertEqual(grouped[0], first)
 def test_dataset_groupby_multiple_dims(self):
     dataset = Dataset((range(8), range(8), range(8), range(8),
                        da.from_array(np.random.rand(8, 8, 8, 8), 4)),
                       kdims=['a', 'b', 'c', 'd'], vdims=['Value'])
     grouped = dataset.groupby(['c', 'd'])
     keys = list(product(range(8), range(8)))
     self.assertEqual(list(grouped.keys()), keys)
     for c, d in keys:
         self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b']))
 def test_zero_sized_coordinates_range(self):
     da = xr.DataArray(np.empty((2, 0)), dims=('y', 'x'), coords={'x': [], 'y': [0 ,1]}, name='A')
     ds = Dataset(da)
     x0, x1 = ds.range('x')
     self.assertTrue(np.isnan(x0))
     self.assertTrue(np.isnan(x1))
     z0, z1 = ds.range('A')
     self.assertTrue(np.isnan(z0))
     self.assertTrue(np.isnan(z1))
 def test_xarray_dataset_with_scalar_dim_canonicalize(self):
     xs = [0, 1]
     ys = [0.1, 0.2, 0.3]
     zs = np.array([[[0, 1], [2, 3], [4, 5]]])
     xrarr = xr.DataArray(zs, coords={'x': xs, 'y': ys, 't': [1]}, dims=['t', 'y', 'x'])
     xrds = xr.Dataset({'v': xrarr})
     ds = Dataset(xrds, kdims=['x', 'y'], vdims=['v'], datatype=['xarray'])
     canonical = ds.dimension_values(2, flat=False)
     self.assertEqual(canonical.ndim, 2)
     expected = np.array([[0, 1], [2, 3], [4, 5]])
     self.assertEqual(canonical, expected)
 def test_xarray_dataset_names_and_units(self):
     xs = [0.1, 0.2, 0.3]
     ys = [0, 1]
     zs = np.array([[0, 1], [2, 3], [4, 5]])
     da = xr.DataArray(zs, coords=[('x_dim', xs), ('y_dim', ys)], name="data_name", dims=['y_dim', 'x_dim'])
     da.attrs['long_name'] = "data long name"
     da.attrs['units'] = "array_unit"
     da.x_dim.attrs['units'] = "x_unit"
     da.y_dim.attrs['long_name'] = "y axis long name"
     dataset = Dataset(da)
     self.assertEqual(dataset.get_dimension("x_dim"), Dimension("x_dim", unit="x_unit"))
     self.assertEqual(dataset.get_dimension("y_dim"), Dimension("y_dim", label="y axis long name"))
     self.assertEqual(dataset.get_dimension("data_name"),
                      Dimension("data_name", label="data long name", unit="array_unit"))
 def test_construct_3d_from_xarray(self):
     try:
         import xarray as xr
     except:
         raise SkipTest("Test requires xarray")
     zs = np.arange(48).reshape(2, 4, 6)
     da = xr.DataArray(zs, dims=['z', 'y', 'x'],
                       coords = {'lat': (('y', 'x'), self.ys),
                                 'lon': (('y', 'x'), self.xs),
                                 'z': [0, 1]}, name='A')
     dataset = Dataset(da, ['lon', 'lat', 'z'], 'A')
     self.assertEqual(dataset.dimension_values('lon'), self.xs.T.flatten())
     self.assertEqual(dataset.dimension_values('lat'), self.ys.T.flatten())
     self.assertEqual(dataset.dimension_values('z', expanded=False), np.array([0, 1]))
     self.assertEqual(dataset.dimension_values('A'), zs.T.flatten())
 def test_3d_xarray_with_constant_dim_canonicalized_to_2d(self):
     try:
         import xarray as xr
     except:
         raise SkipTest("Test requires xarray")
     zs = np.arange(24).reshape(1, 4, 6)
     # Construct DataArray with additional constant dimension
     da = xr.DataArray(zs, dims=['z', 'y', 'x'],
                       coords = {'lat': (('y', 'x'), self.ys),
                                 'lon': (('y', 'x'), self.xs),
                                 'z': [0]}, name='A')
     # Declare Dataset without declaring constant dimension
     dataset = Dataset(da, ['lon', 'lat'], 'A')
     # Ensure that canonicalization drops the constant dimension
     self.assertEqual(dataset.dimension_values('A', flat=False), zs[0])
    def test_construct_from_xarray(self):
        try:
            import xarray as xr
        except:
            raise SkipTest("Test requires xarray")
        coords = OrderedDict([('lat', (('y', 'x'), self.ys)),
                              ('lon', (('y', 'x'), self.xs))])
        da = xr.DataArray(self.zs, dims=['y', 'x'],
                          coords=coords, name='z')
        dataset = Dataset(da)

        # Ensure that dimensions are inferred correctly
        self.assertEqual(dataset.kdims, [Dimension('lat'), Dimension('lon')])
        self.assertEqual(dataset.vdims, [Dimension('z')])

        # Ensure that canonicalization works on multi-dimensional coordinates
        self.assertEqual(dataset.dimension_values('lon', flat=False), self.xs)
        self.assertEqual(dataset.dimension_values('lat', flat=False), self.ys)
        self.assertEqual(dataset.dimension_values('z'), self.zs.T.flatten())
Beispiel #19
0
 def test_dataset_empty_combined_dimension(self):
     ds = Dataset({('x', 'y'): []}, kdims=['x', 'y'])
     ds2 = Dataset({'x': [], 'y': []}, kdims=['x', 'y'])
     self.assertEqual(ds, ds2)
Beispiel #20
0
 def test_graph_node_info_merge_on_index(self):
     node_info = Dataset((np.arange(8), np.arange(1, 9)), 'index', 'label')
     graph = Graph(((self.source, self.target), node_info))
     self.assertEqual(graph.nodes.dimension_values(3),
                      node_info.dimension_values(1))
Beispiel #21
0
 def test_initialize_cube_with_vdims(self):
     cube = Dataset(self.cube, vdims=['Quantity'])
     self.assertEqual(cube.dimensions('value', True),
                      ['Quantity'])
 def test_select_multi_index(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.select(longitude=0, latitude=0), 5)
 def test_select_index(self):
     cube = Dataset(self.cube)
     self.assertEqual(
         cube.select(longitude=0).data.data,
         np.array([[1, 5, 9]], dtype=np.int32))
 def test_getitem_scalar(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube[0, 0], 5)
 def test_select_multi_slice2(self):
     cube = Dataset(self.cube)
     self.assertEqual(
         cube.select(longitude={0, 2}, latitude={0, 2}).data.data,
         np.array([[5, 7]], dtype=np.int32))
 def test_multi_array_groupby_non_scalar(self):
     arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)]
     mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
     with self.assertRaises(ValueError):
         mds.groupby('x')
 def test_multi_array_groupby(self):
     arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)]
     mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
     for i, (k, ds) in enumerate(mds.groupby('y').items()):
         self.assertEqual(k, arrays[i][0, 1])
         self.assertEqual(ds, Dataset([arrays[i]], kdims=['x']))
 def test_xarray_irregular_dataset_values(self):
     ds = Dataset(self.get_multi_dim_irregular_dataset())
     values = ds.dimension_values('z', expanded=False)
     self.assertEqual(values, np.array([0, 1, 2, 3]))
Beispiel #29
0
 def test_dataset_simple_dict_sorted(self):
     dataset = Dataset({2: 2, 1: 1, 3: 3}, kdims=['x'], vdims=['y'])
     self.assertEqual(
         dataset,
         Dataset([(i, i) for i in range(1, 4)], kdims=['x'], vdims=['y']))
 def test_xarray_dataset_irregular_shape(self):
     ds = Dataset(self.get_multi_dim_irregular_dataset())
     shape = ds.interface.shape(ds, gridded=True)
     self.assertEqual(shape, (np.nan, np.nan, 3, 4))
 def test_irregular_and_regular_coordinate_explicit_irregular_coords_inverted(
         self):
     data = self.get_irregular_dataarray(False)
     ds = Dataset(data, ['xc', 'yc'], vdims='Value')
     self.assertEqual(ds.kdims, [Dimension('xc'), Dimension('yc')])
     self.assertEqual(ds.dimension_values(2, flat=False), data.values[0])
Beispiel #32
0
 def test_graph_node_info_merge_on_index_partial(self):
     node_info = Dataset((np.arange(5), np.arange(1, 6)), 'index', 'label')
     graph = Graph(((self.source, self.target), node_info))
     expected = np.array([1., 2., 3., 4., 5., np.NaN, np.NaN, np.NaN])
     self.assertEqual(graph.nodes.dimension_values(3), expected)
 def test_multi_dict_groupby_non_scalar(self):
     arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)]
     mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
     with self.assertRaises(ValueError):
         mds.groupby('x')
Beispiel #34
0
 def test_range_vdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.range('unknown'), (0, 11))
 def test_multi_dict_groupby(self):
     arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)]
     mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular'])
     for i, (k, ds) in enumerate(mds.groupby('y').items()):
         self.assertEqual(k, arrays[i]['y'])
         self.assertEqual(ds, Dataset([arrays[i]], kdims=['x']))
Beispiel #36
0
 def test_select_set(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.select(longitude={0, 1}).data.data,
                      np.array([[1,  2], [5,  6], [9, 10]], dtype=np.int32))
 def test_getitem_index(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube[0].data.data,
                      np.array([[1, 5, 9]], dtype=np.int32))
Beispiel #38
0
 def test_graph_node_info_no_index(self):
     node_info = Dataset(np.arange(8), vdims=['Label'])
     graph = Graph(((self.source, self.target), node_info))
     self.assertEqual(graph.nodes.dimension_values(3),
                      node_info.dimension_values(0))
 def test_select_multi_slice1(self):
     cube = Dataset(self.cube)
     self.assertEqual(
         cube.select(longitude=(0, 1.01), latitude=(0, 1.01)).data.data,
         np.array([[5, 6], [9, 10]], dtype=np.int32))
 def test_range_vdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.range('unknown'), (0, 11))
 def test_select_set(self):
     cube = Dataset(self.cube)
     self.assertEqual(
         cube.select(longitude={0, 1}).data.data,
         np.array([[1, 2], [5, 6], [9, 10]], dtype=np.int32))
Beispiel #42
0
 def test_initialize_cube(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.dimensions(label=True),
                      ['longitude', 'latitude', 'unknown'])
Beispiel #43
0
 def test_select_multi_slice1(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.select(longitude=(0, 1+self.epsilon),
                                  latitude=(0, 1+self.epsilon)).data.data,
                      np.array([[5,  6], [9, 10]], dtype=np.int32))
Beispiel #44
0
 def test_graph_node_info_no_index_mismatch(self):
     node_info = Dataset(np.arange(6), vdims=['Label'])
     with self.assertRaises(ValueError):
         Graph(((self.source, self.target), node_info))
Beispiel #45
0
class TestDimTransforms(ComparisonTestCase):
    def setUp(self):
        self.linear_ints = pd.Series(np.arange(1, 11))
        self.linear_floats = pd.Series(np.arange(1, 11) / 10.)
        self.negative = pd.Series(-self.linear_floats)
        self.repeating = pd.Series(
            ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'])
        self.booleans = self.repeating == 'A'
        self.dataset = Dataset(
            (self.linear_ints, self.linear_floats, self.negative,
             self.repeating, self.booleans),
            ['int', 'float', 'negative', 'categories', 'booleans'])

        if dd is not None:
            ddf = dd.from_pandas(self.dataset.data, npartitions=2)
            self.dataset_dask = self.dataset.clone(data=ddf)

        if xr is None:
            return

        x = np.arange(2, 62, 3)
        y = np.arange(2, 12, 2)
        array = np.arange(100).reshape(5, 20)
        darray = xr.DataArray(data=array,
                              coords=OrderedDict([('x', x), ('y', y)]),
                              dims=['y', 'x'])
        self.dataset_xarray = Dataset(darray, vdims=['z'])
        if da is not None:
            dask_array = da.from_array(array)
            dask_da = xr.DataArray(data=dask_array,
                                   coords=OrderedDict([('x', x), ('y', y)]),
                                   dims=['y', 'x'])
            self.dataset_xarray_dask = Dataset(dask_da, vdims=['z'])

    # Assertion helpers

    def assert_apply(self,
                     expr,
                     expected,
                     skip_dask=False,
                     skip_no_index=False):
        if np.isscalar(expected):
            # Pandas input
            self.assertEqual(expr.apply(self.dataset, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset, keep_index=True),
                             expected)

            if dd is None:
                return

            # Dask input
            self.assertEqual(expr.apply(self.dataset_dask, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset_dask, keep_index=True),
                             expected)
            return

        # Make sure expected is a pandas Series
        self.assertIsInstance(expected, pd.Series)

        # Check using dataset backed by pandas DataFrame
        # keep_index=False
        if not skip_no_index:
            np.testing.assert_equal(expr.apply(self.dataset), expected.values)
        # keep_index=True
        pd.testing.assert_series_equal(expr.apply(self.dataset,
                                                  keep_index=True),
                                       expected,
                                       check_names=False)

        if skip_dask or dd is None:
            return

        # Check using dataset backed by Dask DataFrame
        expected_dask = dd.from_pandas(expected, npartitions=2)

        # keep_index=False, compute=False
        if not skip_no_index:
            da.assert_eq(
                expr.apply(self.dataset_dask, compute=False).compute(),
                expected_dask.values.compute())
        # keep_index=True, compute=False
        dd.assert_eq(expr.apply(self.dataset_dask,
                                keep_index=True,
                                compute=False),
                     expected_dask,
                     check_names=False)
        # keep_index=False, compute=True
        if not skip_no_index:
            np.testing.assert_equal(
                expr.apply(self.dataset_dask, compute=True),
                expected_dask.values.compute())
        # keep_index=True, compute=True
        pd.testing.assert_series_equal(expr.apply(self.dataset_dask,
                                                  keep_index=True,
                                                  compute=True),
                                       expected_dask.compute(),
                                       check_names=False)

    def assert_apply_xarray(self,
                            expr,
                            expected,
                            skip_dask=False,
                            skip_no_index=False):
        import xarray as xr
        if np.isscalar(expected):
            # Pandas input
            self.assertEqual(expr.apply(self.dataset_xarray, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset_xarray, keep_index=True),
                             expected)
            return

        # Make sure expected is a pandas Series
        self.assertIsInstance(expected, xr.DataArray)

        # Check using dataset backed by pandas DataFrame
        # keep_index=False
        if not skip_no_index:
            np.testing.assert_equal(expr.apply(self.dataset_xarray),
                                    expected.values)
        # keep_index=True
        xr.testing.assert_equal(
            expr.apply(self.dataset_xarray, keep_index=True), expected)

        if skip_dask or da is None:
            return

        # Check using dataset backed by Dask DataFrame
        expected_da = da.from_array(expected.values)
        expected_dask = expected.copy()
        expected_dask.data = expected_da

        # keep_index=False, compute=False
        if not skip_no_index:
            da.assert_eq(expr.apply(self.dataset_xarray_dask, compute=False),
                         expected_dask.data)
        # keep_index=True, compute=False
        xr.testing.assert_equal(
            expr.apply(self.dataset_xarray_dask,
                       keep_index=True,
                       compute=False),
            expected_dask,
        )
        # keep_index=False, compute=True
        if not skip_no_index:
            np.testing.assert_equal(
                expr.apply(self.dataset_xarray_dask, compute=True),
                expected_dask.data.compute())
        # keep_index=True, compute=True
        xr.testing.assert_equal(
            expr.apply(self.dataset_xarray_dask, keep_index=True,
                       compute=True),
            expected_dask.compute(),
        )

    # Unary operators

    def test_abs_transform(self):
        expr = abs(dim('negative'))
        self.assert_apply(expr, self.linear_floats)

    def test_neg_transform(self):
        expr = -dim('negative')
        self.assert_apply(expr, self.linear_floats)

    def test_inv_transform(self):
        expr = ~dim('booleans')
        self.assert_apply(expr, ~self.booleans)

    # Binary operators

    def test_add_transform(self):
        expr = dim('float') + 1
        self.assert_apply(expr, self.linear_floats + 1)

    def test_div_transform(self):
        expr = dim('int') / 10.
        self.assert_apply(expr, self.linear_floats)

    def test_floor_div_transform(self):
        expr = dim('int') // 2
        self.assert_apply(expr, self.linear_ints // 2)

    def test_mod_transform(self):
        expr = dim('int') % 2
        self.assert_apply(expr, self.linear_ints % 2)

    def test_mul_transform(self):
        expr = dim('float') * 10.
        self.assert_apply(expr, self.linear_ints.astype('float64'))

    def test_pow_transform(self):
        expr = dim('int')**2
        self.assert_apply(expr, self.linear_ints**2)

    def test_sub_transform(self):
        expr = dim('int') - 10
        self.assert_apply(expr, self.linear_ints - 10)

    # Reverse binary operators

    def test_radd_transform(self):
        expr = 1 + dim('float')
        self.assert_apply(expr, 1 + self.linear_floats)

    def test_rdiv_transform(self):
        expr = 10. / dim('int')
        self.assert_apply(expr, 10. / self.linear_ints)

    def test_rfloor_div_transform(self):
        expr = 2 // dim('int')
        self.assert_apply(expr, 2 // self.linear_ints)

    def test_rmod_transform(self):
        expr = 2 % dim('int')
        self.assert_apply(expr, 2 % self.linear_ints)

    def test_rmul_transform(self):
        expr = 10. * dim('float')
        self.assert_apply(expr, self.linear_ints.astype('float64'))

    def test_rsub_transform(self):
        expr = 10 - dim('int')
        self.assert_apply(expr, 10 - self.linear_ints)

    # NumPy operations

    def test_ufunc_transform(self):
        expr = np.sin(dim('float'))
        self.assert_apply(expr, np.sin(self.linear_floats))

    def test_astype_transform(self):
        expr = dim('int').astype('float64')
        self.assert_apply(expr, self.linear_ints.astype('float64'))

    def test_cumsum_transform(self):
        expr = dim('float').cumsum()
        self.assert_apply(expr, self.linear_floats.cumsum())

    def test_max_transform(self):
        expr = dim('float').max()
        self.assert_apply(expr, self.linear_floats.max())

    def test_min_transform(self):
        expr = dim('float').min()
        self.assert_apply(expr, self.linear_floats.min())

    def test_round_transform(self):
        expr = dim('float').round()
        self.assert_apply(expr, self.linear_floats.round())

    def test_sum_transform(self):
        expr = dim('float').sum()
        self.assert_apply(expr, self.linear_floats.sum())

    def test_std_transform(self):
        expr = dim('float').std(ddof=0)
        self.assert_apply(expr, self.linear_floats.std(ddof=0))

    def test_var_transform(self):
        expr = dim('float').var(ddof=0)
        self.assert_apply(expr, self.linear_floats.var(ddof=0))

    def test_log_transform(self):
        expr = dim('float').log()
        self.assert_apply(expr, np.log(self.linear_floats))

    def test_log10_transform(self):
        expr = dim('float').log10()
        self.assert_apply(expr, np.log10(self.linear_floats))

    # Custom functions

    def test_str_astype(self):
        expr = dim('int').str()
        self.assert_apply(expr, self.linear_ints.astype(str), skip_dask=True)

    def test_norm_transform(self):
        expr = dim('int').norm()
        self.assert_apply(expr, (self.linear_ints - 1) / 9.)

    def test_iloc_transform_int(self):
        expr = dim('int').iloc[1]
        self.assert_apply(expr, self.linear_ints[1])

    def test_iloc_transform_slice(self):
        expr = dim('int').iloc[1:3]
        self.assert_apply(expr, self.linear_ints[1:3], skip_dask=True)

    def test_iloc_transform_list(self):
        expr = dim('int').iloc[[1, 3, 5]]
        self.assert_apply(expr, self.linear_ints[[1, 3, 5]], skip_dask=True)

    def test_bin_transform(self):
        expr = dim('int').bin([0, 5, 10])
        expected = pd.Series(
            [2.5, 2.5, 2.5, 2.5, 2.5, 7.5, 7.5, 7.5, 7.5, 7.5])
        self.assert_apply(expr, expected)

    def test_bin_transform_with_labels(self):
        expr = dim('int').bin([0, 5, 10], ['A', 'B'])
        expected = pd.Series(
            ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B'])
        self.assert_apply(expr, expected)

    def test_categorize_transform_list(self):
        expr = dim('categories').categorize(['circle', 'square', 'triangle'])
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We skip dask because results will depend on partition structure
        self.assert_apply(expr, expected, skip_dask=True)

    def test_categorize_transform_dict(self):
        expr = dim('categories').categorize({
            'A': 'circle',
            'B': 'square',
            'C': 'triangle'
        })
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We don't skip dask because results are now stable across partitions
        self.assert_apply(expr, expected)

    def test_categorize_transform_dict_with_default(self):
        expr = dim('categories').categorize({
            'A': 'circle',
            'B': 'square'
        },
                                            default='triangle')
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We don't skip dask because results are stable across partitions
        self.assert_apply(expr, expected)

    # Numpy functions

    def test_digitize(self):
        expr = dim('int').digitize([1, 5, 10])
        expected = pd.Series(np.array([1, 1, 1, 1, 2, 2, 2, 2, 2,
                                       3])).astype('int64')
        self.assert_apply(expr, expected)

    def test_isin(self):
        expr = dim('int').digitize([1, 5, 10]).isin([1, 3])
        expected = pd.Series(
            np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype='bool'))
        self.assert_apply(expr, expected)

    # Complex expressions

    def test_multi_operator_expression(self):
        expr = (((dim('float') - 2) * 3)**2)
        self.assert_apply(expr, ((self.linear_floats - 2) * 3)**2)

    def test_multi_dim_expression(self):
        expr = dim('int') - dim('float')
        self.assert_apply(expr, self.linear_ints - self.linear_floats)

    # Repr method

    def test_dim_repr(self):
        self.assertEqual(repr(dim('float')), "dim('float')")

    def test_unary_op_repr(self):
        self.assertEqual(repr(-dim('float')), "-dim('float')")

    def test_binary_op_repr(self):
        self.assertEqual(repr(dim('float') * 2), "dim('float')*2")

    def test_reverse_binary_op_repr(self):
        self.assertEqual(repr(1 + dim('float')), "1+dim('float')")

    def test_ufunc_expression_repr(self):
        self.assertEqual(repr(np.log(dim('float'))), "dim('float').log()")

    def test_custom_func_repr(self):
        self.assertEqual(repr(dim('float').norm()), "dim('float').norm()")

    def test_multi_operator_expression_repr(self):
        self.assertEqual(repr(((dim('float') - 2) * 3)**2),
                         "((dim('float')-2)*3)**2")

    # Applies method

    def test_multi_dim_expression_applies(self):
        self.assertEqual((dim('int') - dim('float')).applies(self.dataset),
                         True)

    def test_multi_dim_expression_not_applies(self):
        self.assertEqual((dim('foo') - dim('bar')).applies(self.dataset),
                         False)

    def test_multi_dim_expression_partial_applies(self):
        self.assertEqual((dim('int') - dim('bar')).applies(self.dataset),
                         False)

    # Check namespaced expressions

    def test_pandas_namespace_accessor_repr(self):
        self.assertEqual(repr(dim('date').df.dt.year),
                         "dim('date').pd.dt.year")

    def test_pandas_str_accessor(self):
        expr = dim('categories').df.str.lower()
        self.assert_apply(expr, self.repeating.str.lower())

    def test_pandas_chained_methods(self):
        expr = dim('int').df.rolling(1).mean()
        self.assert_apply(expr, self.linear_ints.rolling(1).mean())

    @xr_skip
    def test_xarray_namespace_method_repr(self):
        self.assertEqual(repr(dim('date').xr.quantile(0.95)),
                         "dim('date').xr.quantile(0.95)")

    @xr_skip
    def test_xarray_quantile_method(self):
        expr = dim('z').xr.quantile(0.95)
        self.assert_apply_xarray(expr,
                                 self.dataset_xarray.data.z.quantile(0.95),
                                 skip_dask=True)

    @xr_skip
    def test_xarray_roll_method(self):
        expr = dim('z').xr.roll({'x': 1}, roll_coords=False)
        self.assert_apply_xarray(
            expr, self.dataset_xarray.data.z.roll({'x': 1}, roll_coords=False))

    @xr_skip
    def test_xarray_coarsen_method(self):
        expr = dim('z').xr.coarsen({'x': 4}).mean()
        self.assert_apply_xarray(
            expr,
            self.dataset_xarray.data.z.coarsen({
                'x': 4
            }).mean())

    # Dynamic arguments

    def test_dynamic_mul(self):
        p = Params(a=1)
        expr = dim('float') * p.param.a
        self.assertEqual(list(expr.params.values()), [p.param.a])
        self.assert_apply(expr, self.linear_floats)
        p.a = 2
        self.assert_apply(expr, self.linear_floats * 2)

    def test_dynamic_arg(self):
        p = Params(a=1)
        expr = dim('float').round(p.param.a)
        self.assertEqual(list(expr.params.values()), [p.param.a])
        self.assert_apply(expr, np.round(self.linear_floats, 1))
        p.a = 2
        self.assert_apply(expr, np.round(self.linear_floats, 2))

    def test_dynamic_kwarg(self):
        p = Params(a=1)
        expr = dim('float').round(decimals=p.param.a)
        self.assertEqual(list(expr.params.values()), [p.param.a])
        self.assert_apply(expr, np.round(self.linear_floats, 1))
        p.a = 2
        self.assert_apply(expr, np.round(self.linear_floats, 2))

    def test_pickle(self):
        expr = (((dim('float') - 2) * 3)**2)
        expr2 = pickle.loads(pickle.dumps(expr))
        self.assertEqual(expr, expr2)
Beispiel #46
0
 def test_dimension_values_kdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.dimension_values('longitude', expanded=False),
                      np.array([-1, 0, 1, 2], dtype=np.int32))
Beispiel #47
0
 def test_dataset_range_categorical_dimension_empty(self):
     ddf = dd.from_pandas(pd.DataFrame({'a': ['1', '2', '3']}), 1)
     ds = Dataset(ddf).iloc[:0]
     ds_range = ds.range(0)
     self.assertTrue(np.isnan(ds_range[0]))
     self.assertTrue(np.isnan(ds_range[1]))
Beispiel #48
0
 def test_dimension_values_vdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(
         cube.dimension_values('unknown', flat=False),
         np.array([[0, 4, 8], [1, 5, 9], [2, 6, 10], [3, 7, 11]],
                  dtype=np.int32).T)
Beispiel #49
0
 def test_initialize_cube_with_kdims(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.dimensions('key', True),
                      ['longitude', 'latitude'])
Beispiel #50
0
 def test_range_kdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.range('longitude'), (-1, 2))
Beispiel #51
0
 def test_dimension_values_kdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.dimension_values('longitude', expanded=False),
                      np.array([-1,  0,  1, 2], dtype=np.int32))
Beispiel #52
0
 def test_dataset_empty_list_init_dtypes(self):
     dataset = Dataset([], kdims=['x'], vdims=['y'])
     for d in 'xy':
         self.assertEqual(dataset.dimension_values(d).dtype, np.float64)
Beispiel #53
0
 def test_range_kdim(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.range('longitude'), (-1, 2))
 def test_dataset_mixed_type_range(self):
     ds = Dataset((['A', 'B', 'C', None], ), 'A')
     vmin, vmax = ds.range(0)
     self.assertTrue(np.isnan(vmin))
     self.assertTrue(np.isnan(vmax))
Beispiel #55
0
 def test_select_index(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.select(longitude=0).data.data,
                      np.array([[1, 5, 9]], dtype=np.int32))
Beispiel #56
0
 def test_initialize_cube(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.dimensions(label=True),
                      ['longitude', 'latitude', 'unknown'])
Beispiel #57
0
 def test_select_multi_index(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.select(longitude=0, latitude=0), 5)
Beispiel #58
0
 def test_initialize_cube_with_kdims(self):
     cube = Dataset(self.cube, kdims=['longitude', 'latitude'])
     self.assertEqual(cube.dimensions('key', True),
                      ['longitude', 'latitude'])
Beispiel #59
0
 def test_select_multi_slice2(self):
     cube = Dataset(self.cube)
     self.assertEqual(cube.select(longitude={0, 2},
                                  latitude={0, 2}).data.data,
                      np.array([[5, 7]], dtype=np.int32))
Beispiel #60
0
 def test_initialize_cube_with_vdims(self):
     cube = Dataset(self.cube, vdims=['Quantity'])
     self.assertEqual(cube.dimensions('value', True), ['Quantity'])