예제 #1
0
    def testFromTensor(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        self.assertIsInstance(df.index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertEqual(
            df.op.dtypes[0], tensor.dtype,
            'DataFrame converted from tensor have the wrong dtype')

        df = df.tiles()
        self.assertEqual(len(df.chunks), 4)
        self.assertIsInstance(df.chunks[0].index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertIsInstance(df.chunks[0].index_value, IndexValue)

        # test converted from 1-d tensor
        tensor2 = mt.array([1, 2, 3])
        # in fact, tensor3 is (3,1)
        tensor3 = mt.array([tensor2]).T

        df2 = dataframe_from_tensor(tensor2)
        df3 = dataframe_from_tensor(tensor3)
        df2 = df2.tiles()
        df3 = df3.tiles()
        np.testing.assert_equal(df2.chunks[0].index, (0, 0))
        np.testing.assert_equal(df3.chunks[0].index, (0, 0))

        # test converted from scalar
        scalar = mt.array(1)
        np.testing.assert_equal(scalar.ndim, 0)
        with self.assertRaises(TypeError):
            dataframe_from_tensor(scalar)

        # from tensor with given index
        df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2))
        df = df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))
        pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))

        # from tensor with index that is a tensor as well
        df = dataframe_from_tensor(tensor, index=mt.arange(0, 20, 2))
        df = df.tiles()
        self.assertEqual(len(df.chunks[0].inputs), 2)
        self.assertFalse(df.chunks[0].index_value.has_value())

        # from tensor with given columns
        df = dataframe_from_tensor(tensor, columns=list('abcdefghij'))
        df = df.tiles()
        pd.testing.assert_index_equal(df.dtypes.index,
                                      pd.Index(list('abcdefghij')))
        pd.testing.assert_index_equal(df.chunks[0].columns_value.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[0].dtypes.index,
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[1].columns_value.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[1].dtypes.index,
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[2].columns_value.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[2].dtypes.index,
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[3].columns_value.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[3].dtypes.index,
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))

        # test series from tensor
        tensor = mt.random.rand(10, chunk_size=4)
        series = series_from_tensor(tensor, name='a')

        self.assertEqual(series.dtype, tensor.dtype)
        self.assertEqual(series.name, 'a')
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.RangeIndex(10))

        series = series.tiles()
        self.assertEqual(len(series.chunks), 3)
        pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(),
                                      pd.RangeIndex(0, 4))
        self.assertEqual(series.chunks[0].name, 'a')
        pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(),
                                      pd.RangeIndex(4, 8))
        self.assertEqual(series.chunks[1].name, 'a')
        pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(),
                                      pd.RangeIndex(8, 10))
        self.assertEqual(series.chunks[2].name, 'a')

        df = dataframe_from_1d_tensors(
            [mt.tensor(np.random.rand(4)),
             mt.tensor(np.random.rand(4))])
        pd.testing.assert_index_equal(df.columns_value.to_pandas(),
                                      pd.RangeIndex(2))

        df = df.tiles()

        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(),
                                      pd.RangeIndex(4))

        series = series_from_tensor(mt.random.rand(4))
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.RangeIndex(4))

        series = series_from_tensor(mt.random.rand(4), index=[1, 2, 3])
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.Index([1, 2, 3]))

        series = series_from_tensor(mt.random.rand(4),
                                    index=pd.Index([1, 2, 3], name='my_index'))
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.Index([1, 2, 3], name='my_index'))
        self.assertEqual(series.index_value.name, 'my_index')

        with self.assertRaises(TypeError):
            series_from_tensor(mt.ones((10, 10)))

        # index has wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_tensor(mt.random.rand(4, 3),
                                  index=mt.random.rand(5))

        # columns have wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_tensor(mt.random.rand(4, 3), columns=['a', 'b'])

        # index should be 1-d
        with self.assertRaises(ValueError):
            dataframe_from_tensor(mt.tensor(np.random.rand(3, 2)),
                                  index=mt.tensor(np.random.rand(3, 2)))

        # 1-d tensors should have same shapen
        with self.assertRaises(ValueError):
            dataframe_from_1d_tensors(
                [mt.tensor(np.random.rand(3)),
                 mt.tensor(np.random.rand(2))])

        # index has wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_1d_tensors([mt.tensor(np.random.rand(3))],
                                      index=mt.tensor(np.random.rand(2)))

        # columns have wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_1d_tensors([mt.tensor(np.random.rand(3))],
                                      columns=['a', 'b'])

        # index should be 1-d
        with self.assertRaises(ValueError):
            series_from_tensor(mt.random.rand(4), index=mt.random.rand(4, 3))
예제 #2
0
    def testFromTensor(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        self.assertIsInstance(df.index_value._index_value, IndexValue.RangeIndex)
        self.assertEqual(df.op.dtypes[0], tensor.dtype, 'DataFrame converted from tensor have the wrong dtype')

        df.tiles()
        self.assertEqual(len(df.chunks), 4)
        self.assertIsInstance(df.chunks[0].index_value._index_value, IndexValue.RangeIndex)
        self.assertIsInstance(df.chunks[0].index_value, IndexValue)

        # test converted from 1-d tensor
        tensor2 = mt.array([1, 2, 3])
        # in fact, tensor3 is (3,1)
        tensor3 = mt.array([tensor2]).T

        df2 = dataframe_from_tensor(tensor2)
        df3 = dataframe_from_tensor(tensor3)
        df2.tiles()
        df3.tiles()
        np.testing.assert_equal(df2.chunks[0].index, (0, 0))
        np.testing.assert_equal(df3.chunks[0].index, (0, 0))

        # test converted from scalar
        scalar = mt.array(1)
        np.testing.assert_equal(scalar.ndim, 0)
        with self.assertRaises(TypeError):
            dataframe_from_tensor(scalar)

        # from tensor with given index
        df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2))
        df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2)))
        pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2)))

        # from tensor with given columns
        df = dataframe_from_tensor(tensor, columns=list('abcdefghij'))
        df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].columns_value.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[1].columns_value.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[2].columns_value.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[3].columns_value.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j']))

        # test series from tensor
        tensor = mt.random.rand(10, chunk_size=4)
        series = series_from_tensor(tensor, name='a')

        self.assertEqual(series.dtype, tensor.dtype)
        self.assertEqual(series.name, 'a')
        pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(10))

        series.tiles()
        self.assertEqual(len(series.chunks), 3)
        pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(), pd.RangeIndex(0, 4))
        self.assertEqual(series.chunks[0].name, 'a')
        pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(), pd.RangeIndex(4, 8))
        self.assertEqual(series.chunks[1].name, 'a')
        pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(), pd.RangeIndex(8, 10))
        self.assertEqual(series.chunks[2].name, 'a')

        with self.assertRaises(TypeError):
            series_from_tensor(mt.ones((10, 10)))
예제 #3
0
def test_from_tensor():
    tensor = mt.random.rand(10, 10, chunk_size=5)
    df = dataframe_from_tensor(tensor)
    assert isinstance(df.index_value._index_value, IndexValue.RangeIndex)
    assert df.op.dtypes[0] == tensor.dtype

    df = tile(df)
    assert len(df.chunks) == 4
    assert isinstance(df.chunks[0].index_value._index_value, IndexValue.RangeIndex)
    assert isinstance(df.chunks[0].index_value, IndexValue)

    # test converted from 1-d tensor
    tensor2 = mt.array([1, 2, 3])
    # in fact, tensor3 is (3,1)
    tensor3 = mt.array([tensor2]).T

    df2 = dataframe_from_tensor(tensor2)
    df3 = dataframe_from_tensor(tensor3)
    df2 = tile(df2)
    df3 = tile(df3)
    np.testing.assert_equal(df2.chunks[0].index, (0, 0))
    np.testing.assert_equal(df3.chunks[0].index, (0, 0))

    # test converted from scalar
    scalar = mt.array(1)
    np.testing.assert_equal(scalar.ndim, 0)
    with pytest.raises(TypeError):
        dataframe_from_tensor(scalar)

    # from tensor with given index
    df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2))
    df = tile(df)
    pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2)))
    pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(), pd.Index(np.arange(0, 10, 2)))
    pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2)))
    pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(), pd.Index(np.arange(10, 20, 2)))

    # from tensor with index that is a tensor as well
    df = dataframe_from_tensor(tensor, index=mt.arange(0, 20, 2))
    df = tile(df)
    assert len(df.chunks[0].inputs) == 2
    assert df.chunks[0].index_value.has_value() is False

    # from tensor with given columns
    df = dataframe_from_tensor(tensor, columns=list('abcdefghij'))
    df = tile(df)
    pd.testing.assert_index_equal(df.dtypes.index, pd.Index(list('abcdefghij')))
    pd.testing.assert_index_equal(df.chunks[0].columns_value.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e']))
    pd.testing.assert_index_equal(df.chunks[0].dtypes.index, pd.Index(['a', 'b', 'c', 'd', 'e']))
    pd.testing.assert_index_equal(df.chunks[1].columns_value.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j']))
    pd.testing.assert_index_equal(df.chunks[1].dtypes.index, pd.Index(['f', 'g', 'h', 'i', 'j']))
    pd.testing.assert_index_equal(df.chunks[2].columns_value.to_pandas(), pd.Index(['a', 'b', 'c', 'd', 'e']))
    pd.testing.assert_index_equal(df.chunks[2].dtypes.index, pd.Index(['a', 'b', 'c', 'd', 'e']))
    pd.testing.assert_index_equal(df.chunks[3].columns_value.to_pandas(), pd.Index(['f', 'g', 'h', 'i', 'j']))
    pd.testing.assert_index_equal(df.chunks[3].dtypes.index, pd.Index(['f', 'g', 'h', 'i', 'j']))

    # test series from tensor
    tensor = mt.random.rand(10, chunk_size=4)
    series = series_from_tensor(tensor, name='a')

    assert series.dtype == tensor.dtype
    assert series.name == 'a'
    pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(10))

    series = tile(series)
    assert len(series.chunks) == 3
    pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(), pd.RangeIndex(0, 4))
    assert series.chunks[0].name == 'a'
    pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(), pd.RangeIndex(4, 8))
    assert series.chunks[1].name == 'a'
    pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(), pd.RangeIndex(8, 10))
    assert series.chunks[2].name == 'a'

    d = OrderedDict([(0, mt.tensor(np.random.rand(4))),
                     (1, mt.tensor(np.random.rand(4)))])
    df = dataframe_from_1d_tileables(d)
    pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))

    df = tile(df)

    pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(), pd.RangeIndex(4))

    series = series_from_tensor(mt.random.rand(4))
    pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))

    series = series_from_tensor(mt.random.rand(4), index=[1, 2, 3])
    pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.Index([1, 2, 3]))

    series = series_from_tensor(mt.random.rand(4), index=pd.Index([1, 2, 3], name='my_index'))
    pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.Index([1, 2, 3], name='my_index'))
    assert series.index_value.name == 'my_index'

    with pytest.raises(TypeError):
        series_from_tensor(mt.ones((10, 10)))

    # index has wrong shape
    with pytest.raises(ValueError):
        dataframe_from_tensor(mt.random.rand(4, 3), index=mt.random.rand(5))

    # columns have wrong shape
    with pytest.raises(ValueError):
        dataframe_from_tensor(mt.random.rand(4, 3), columns=['a', 'b'])

    # index should be 1-d
    with pytest.raises(ValueError):
        dataframe_from_tensor(mt.tensor(np.random.rand(3, 2)),
                              index=mt.tensor(np.random.rand(3, 2)))

    # 1-d tensors should have same shape
    with pytest.raises(ValueError):
        dataframe_from_1d_tileables(OrderedDict([(0, mt.tensor(np.random.rand(3))),
                                                 (1, mt.tensor(np.random.rand(2)))]))

    # index has wrong shape
    with pytest.raises(ValueError):
        dataframe_from_1d_tileables({0: mt.tensor(np.random.rand(3))},
                                    index=mt.tensor(np.random.rand(2)))

    # columns have wrong shape
    with pytest.raises(ValueError):
        dataframe_from_1d_tileables({0: mt.tensor(np.random.rand(3))},
                                    columns=['a', 'b'])

    # index should be 1-d
    with pytest.raises(ValueError):
        series_from_tensor(mt.random.rand(4), index=mt.random.rand(4, 3))