コード例 #1
0
ファイル: test_datasource.py プロジェクト: ueshin/mars
    def testFromTileableIndex(self):
        t = mt.random.rand(10, 4)

        with self.assertRaises(ValueError):
            from_tileable(t)

        pd_df = pd.DataFrame(np.random.rand(10, 4),
                             index=np.arange(10, 0, -1).astype(np.int64))
        pd_df.index.name = 'ind'
        df = from_pandas_df(pd_df, chunk_size=6)

        for o in [df, df[0]]:
            index = o.index
            self.assertIsInstance(index, Int64Index)
            self.assertEqual(index.dtype, np.int64)
            self.assertEqual(index.name, pd_df.index.name)
            self.assertIsInstance(index.index_value.value,
                                  IndexValue.Int64Index)

            index = index.tiles()

            self.assertEqual(len(index.chunks), 2)
            for c in index.chunks:
                self.assertEqual(c.dtype, np.int64)
                self.assertEqual(c.name, pd_df.index.name)
                self.assertIsInstance(c.index_value.value,
                                      IndexValue.Int64Index)

        t = mt.random.rand(10, chunk_size=6)
        index = from_tileable(t, name='new_name')

        self.assertIsInstance(index, Float64Index)
        self.assertEqual(index.dtype, np.float64)
        self.assertEqual(index.name, 'new_name')
        self.assertIsInstance(index.index_value.value, IndexValue.Float64Index)

        index = index.tiles()

        self.assertEqual(len(index.chunks), 2)
        for c in index.chunks:
            self.assertEqual(c.dtype, np.float64)
            self.assertEqual(c.name, 'new_name')
            self.assertIsInstance(c.index_value.value, IndexValue.Float64Index)
コード例 #2
0
ファイル: test_datasource.py プロジェクト: haijohn/mars
def test_from_tileable_index():
    t = mt.random.rand(10, 4)

    with pytest.raises(ValueError):
        from_tileable(t)

    pd_df = pd.DataFrame(np.random.rand(10, 4),
                         index=np.arange(10, 0, -1).astype(np.int64))
    pd_df.index.name = 'ind'
    df = from_pandas_df(pd_df, chunk_size=6)

    for o in [df, df[0]]:
        index = o.index
        assert isinstance(index, Int64Index)
        assert index.dtype == np.int64
        assert index.name == pd_df.index.name
        assert isinstance(index.index_value.value, IndexValue.Int64Index)

        index = tile(index)

        assert len(index.chunks) == 2
        for c in index.chunks:
            assert c.dtype == np.int64
            assert c.name == pd_df.index.name
            assert isinstance(c.index_value.value, IndexValue.Int64Index)

    t = mt.random.rand(10, chunk_size=6)
    index = from_tileable(t, name='new_name')

    assert isinstance(index, Float64Index)
    assert index.dtype == np.float64
    assert index.name == 'new_name'
    assert isinstance(index.index_value.value, IndexValue.Float64Index)

    index = tile(index)

    assert len(index.chunks) == 2
    for c in index.chunks:
        assert c.dtype == np.float64
        assert c.name == 'new_name'
        assert isinstance(c.index_value.value, IndexValue.Float64Index)
コード例 #3
0
    def testIndexExecution(self):
        rs = np.random.RandomState(0)
        pdf = pd.DataFrame(rs.rand(20, 10),
                           index=np.arange(20, 0, -1),
                           columns=['a' + str(i) for i in range(10)])
        df = from_pandas_df(pdf, chunk_size=13)

        # test df.index
        result = self.executor.execute_dataframe(df.index, concat=True)[0]
        pd.testing.assert_index_equal(result, pdf.index)

        result = self.executor.execute_dataframe(df.columns, concat=True)[0]
        pd.testing.assert_index_equal(result, pdf.columns)

        # df has unknown chunk shape on axis 0
        df = df[df.a1 < 0.5]

        # test df.index
        result = self.executor.execute_dataframe(df.index, concat=True)[0]
        pd.testing.assert_index_equal(result, pdf[pdf.a1 < 0.5].index)

        s = pd.Series(pdf['a1'], index=pd.RangeIndex(20))
        series = from_pandas_series(s, chunk_size=13)

        # test series.index which has value
        result = self.executor.execute_dataframe(series.index, concat=True)[0]
        pd.testing.assert_index_equal(result, s.index)

        s = pdf['a2']
        series = from_pandas_series(s, chunk_size=13)

        # test series.index
        result = self.executor.execute_dataframe(series.index, concat=True)[0]
        pd.testing.assert_index_equal(result, s.index)

        # test tensor
        raw = rs.random(20)
        t = mt.tensor(raw, chunk_size=13)

        result = self.executor.execute_dataframe(from_tileable(t),
                                                 concat=True)[0]
        pd.testing.assert_index_equal(result, pd.Index(raw))