def testCut(self): s = from_pandas_series(pd.Series([1., 2., 3., 4.]), chunk_size=2) with self.assertRaises(ValueError): _ = cut(s, -1) with self.assertRaises(ValueError): _ = cut([[1, 2], [3, 4]], 3) with self.assertRaises(ValueError): _ = cut([], 3) r, b = cut(s, [1.5, 2.5], retbins=True) self.assertIsInstance(r, SERIES_TYPE) self.assertIsInstance(b, TENSOR_TYPE) r = r.tiles() self.assertEqual(len(r.chunks), 2) for c in r.chunks: self.assertIsInstance(c, SERIES_CHUNK_TYPE) self.assertEqual(c.shape, (2, )) r = cut(s.to_tensor(), [1.5, 2.5]) self.assertIsInstance(r, CATEGORICAL_TYPE) self.assertEqual(len(r), len(s)) self.assertIn('Categorical', repr(r)) r = r.tiles() self.assertEqual(len(r.chunks), 2) for c in r.chunks: self.assertIsInstance(c, CATEGORICAL_CHUNK_TYPE) self.assertEqual(c.shape, (2, )) self.assertEqual(c.ndim, 1) # test serialize g = r.build_graph(tiled=False) g2 = type(g).from_pb(g.to_pb()) g2 = type(g).from_json(g2.to_json()) r2 = next(n for n in g2 if isinstance(n, CATEGORICAL_TYPE)) self.assertEqual(len(r2), len(r)) r = cut([0, 1, 1, 2], bins=4, labels=False) self.assertIsInstance(r, TENSOR_TYPE) e = pd.cut([0, 1, 1, 2], bins=4, labels=False) self.assertEqual(r.dtype, e.dtype)
def test_cut(): s = from_pandas_series(pd.Series([1., 2., 3., 4.]), chunk_size=2) with pytest.raises(ValueError): _ = cut(s, -1) with pytest.raises(ValueError): _ = cut([[1, 2], [3, 4]], 3) with pytest.raises(ValueError): _ = cut([], 3) r, b = cut(s, [1.5, 2.5], retbins=True) assert isinstance(r, SERIES_TYPE) assert isinstance(b, TENSOR_TYPE) r = tile(r) assert len(r.chunks) == 2 for c in r.chunks: assert isinstance(c, SERIES_CHUNK_TYPE) assert c.shape == (2, ) r = cut(s.to_tensor(), [1.5, 2.5]) assert isinstance(r, CATEGORICAL_TYPE) assert len(r) == len(s) assert 'Categorical' in repr(r) r = tile(r) assert len(r.chunks) == 2 for c in r.chunks: assert isinstance(c, CATEGORICAL_CHUNK_TYPE) assert c.shape == (2, ) assert c.ndim == 1 r = cut([0, 1, 1, 2], bins=4, labels=False) assert isinstance(r, TENSOR_TYPE) e = pd.cut([0, 1, 1, 2], bins=4, labels=False) assert r.dtype == e.dtype