Example #1
0
    def testCut(self):
        s = from_pandas_series(pd.Series([1., 2., 3., 4.]), chunk_size=2)

        with self.assertRaises(ValueError):
            _ = cut(s, -1)

        with self.assertRaises(ValueError):
            _ = cut([[1, 2], [3, 4]], 3)

        with self.assertRaises(ValueError):
            _ = cut([], 3)

        r, b = cut(s, [1.5, 2.5], retbins=True)
        self.assertIsInstance(r, SERIES_TYPE)
        self.assertIsInstance(b, TENSOR_TYPE)

        r = r.tiles()

        self.assertEqual(len(r.chunks), 2)
        for c in r.chunks:
            self.assertIsInstance(c, SERIES_CHUNK_TYPE)
            self.assertEqual(c.shape, (2, ))

        r = cut(s.to_tensor(), [1.5, 2.5])
        self.assertIsInstance(r, CATEGORICAL_TYPE)
        self.assertEqual(len(r), len(s))
        self.assertIn('Categorical', repr(r))

        r = r.tiles()

        self.assertEqual(len(r.chunks), 2)
        for c in r.chunks:
            self.assertIsInstance(c, CATEGORICAL_CHUNK_TYPE)
            self.assertEqual(c.shape, (2, ))
            self.assertEqual(c.ndim, 1)

        # test serialize
        g = r.build_graph(tiled=False)
        g2 = type(g).from_pb(g.to_pb())
        g2 = type(g).from_json(g2.to_json())
        r2 = next(n for n in g2 if isinstance(n, CATEGORICAL_TYPE))
        self.assertEqual(len(r2), len(r))

        r = cut([0, 1, 1, 2], bins=4, labels=False)
        self.assertIsInstance(r, TENSOR_TYPE)
        e = pd.cut([0, 1, 1, 2], bins=4, labels=False)
        self.assertEqual(r.dtype, e.dtype)
Example #2
0
def test_cut():
    s = from_pandas_series(pd.Series([1., 2., 3., 4.]), chunk_size=2)

    with pytest.raises(ValueError):
        _ = cut(s, -1)

    with pytest.raises(ValueError):
        _ = cut([[1, 2], [3, 4]], 3)

    with pytest.raises(ValueError):
        _ = cut([], 3)

    r, b = cut(s, [1.5, 2.5], retbins=True)
    assert isinstance(r, SERIES_TYPE)
    assert isinstance(b, TENSOR_TYPE)

    r = tile(r)

    assert len(r.chunks) == 2
    for c in r.chunks:
        assert isinstance(c, SERIES_CHUNK_TYPE)
        assert c.shape == (2, )

    r = cut(s.to_tensor(), [1.5, 2.5])
    assert isinstance(r, CATEGORICAL_TYPE)
    assert len(r) == len(s)
    assert 'Categorical' in repr(r)

    r = tile(r)

    assert len(r.chunks) == 2
    for c in r.chunks:
        assert isinstance(c, CATEGORICAL_CHUNK_TYPE)
        assert c.shape == (2, )
        assert c.ndim == 1

    r = cut([0, 1, 1, 2], bins=4, labels=False)
    assert isinstance(r, TENSOR_TYPE)
    e = pd.cut([0, 1, 1, 2], bins=4, labels=False)
    assert r.dtype == e.dtype