예제 #1
0
    def testToFrameOrSeries(self):
        raw = pd.Series(np.random.rand(10), name='col')
        series = Series(raw)

        r = series.to_frame()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(), result)

        r = series.to_frame(name='new_name')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(name='new_name'), result)

        raw = pd.Index(np.random.rand(10), name='col')
        index = Index(raw)

        r = index.to_frame()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(), result)

        r = index.to_frame(index=False)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(index=False), result)

        r = index.to_frame(name='new_name')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(name='new_name'), result)

        r = index.to_series()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(raw.to_series(), result)

        r = index.to_series(index=pd.RangeIndex(0, 10))
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(
            raw.to_series(index=pd.RangeIndex(0, 10)), result)

        r = index.to_series(name='new_name')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(raw.to_series(name='new_name'), result)

        raw = pd.MultiIndex.from_tuples([('A', 'E'), ('B', 'F'), ('C', 'G')])
        index = Index(raw, tupleize_cols=True)

        r = index.to_frame()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(), result)

        with self.assertRaises(TypeError):
            index.to_frame(name='XY')

        with self.assertRaises(ValueError):
            index.to_frame(name=['X', 'Y', 'Z'])

        r = index.to_frame(name=['X', 'Y'])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(raw.to_frame(name=['X', 'Y']), result)

        r = index.to_series(name='new_name')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(raw.to_series(name='new_name'), result)
예제 #2
0
파일: test_core.py 프로젝트: h8f/mars
def test_series_params():
    raw = pd.Series([1, 2, 3], name='a')
    series = Series(raw)
    series = series[series < 2]
    series = tile(series)
    c = series.chunks[0]

    assert any(np.isnan(s) for s in c.params['shape'])
    assert np.isnan(c.params['index_value'].min_val)
    c.params = c.get_params_from_data(raw[raw < 2])
    # shape and index_value updated
    assert not any(np.isnan(s) for s in c.params['shape'])
    assert not np.isnan(c.params['index_value'].min_val)

    params = c.params.copy()
    params.pop('index', None)
    series.params = params
    assert np.prod(series.shape) > 0
    series.refresh_params()
예제 #3
0
파일: test_core.py 프로젝트: h8f/mars
def test_groupby_params():
    raw = pd.DataFrame({'a': [1, 2, 3]})
    df = DataFrame(raw)
    grouped = df.groupby('a')
    grouped = tile(grouped)
    c = grouped.chunks[0]

    c.params = c.get_params_from_data(wrapped_groupby(raw, by='a'))
    params = c.params.copy()
    params.pop('index', None)
    grouped.params = params

    raw = pd.Series([1, 2, 3], name='a')
    series = Series(raw)
    grouped = series.groupby(level=0)
    grouped = tile(grouped)
    c = grouped.chunks[0]

    c.params = c.get_params_from_data(wrapped_groupby(raw, level=0))
    params = c.params.copy()
    params.pop('index', None)
    grouped.params = params
    grouped.refresh_params()
예제 #4
0
파일: test_core.py 프로젝트: h8f/mars
    def testKeyValue(self):
        raw = pd.DataFrame(np.random.rand(4, 3), columns=list('ABC'))
        df = DataFrame(raw)

        result = self.executor.execute_dataframe(df.values, concat=True)[0]
        np.testing.assert_array_equal(result, raw.values)

        result = self.executor.execute_dataframe(df.keys(), concat=True)[0]
        pd.testing.assert_index_equal(result, raw.keys())

        raw = pd.Series(np.random.rand(10))
        s = Series(raw)

        result = self.executor.execute_dataframe(s.values, concat=True)[0]
        np.testing.assert_array_equal(result, raw.values)

        result = self.executor.execute_dataframe(s.keys(), concat=True)[0]
        pd.testing.assert_index_equal(result, raw.keys())

        raw = pd.Index(np.random.rand(10))
        idx = Index(raw)

        result = self.executor.execute_dataframe(idx.values, concat=True)[0]
        np.testing.assert_array_equal(result, raw.values)
예제 #5
0
def test_key_value(setup):
    raw = pd.DataFrame(np.random.rand(4, 3), columns=list('ABC'))
    df = DataFrame(raw)

    result = df.values.execute().fetch()
    np.testing.assert_array_equal(result, raw.values)

    result = df.keys().execute().fetch()
    pd.testing.assert_index_equal(result, raw.keys())

    raw = pd.Series(np.random.rand(10))
    s = Series(raw)

    result = s.values.execute().fetch()
    np.testing.assert_array_equal(result, raw.values)

    result = s.keys().execute().fetch()
    pd.testing.assert_index_equal(result, raw.keys())

    raw = pd.Index(np.random.rand(10))
    idx = Index(raw)

    result = idx.values.execute().fetch()
    np.testing.assert_array_equal(result, raw.values)
예제 #6
0
def test_comp(setup):
    df1 = DataFrame(pd.DataFrame(np.random.rand(4, 3)))
    df2 = DataFrame(pd.DataFrame(np.random.rand(4, 3)))

    with enter_mode(build=True):
        assert not df1.data == df2.data
        assert df1.data == df1.data

    for op in [
            operator.eq, operator.ne, operator.lt, operator.gt, operator.le,
            operator.ge
    ]:
        eq_df = op(df1, df2)
        pd.testing.assert_index_equal(eq_df.index_value.to_pandas(),
                                      df1.index_value.to_pandas())

        # index not identical
        df3 = DataFrame(pd.DataFrame(np.random.rand(4, 3), index=[1, 2, 3, 4]))
        with pytest.raises(ValueError):
            op(df1, df3)

        # columns not identical
        df4 = DataFrame(
            pd.DataFrame(np.random.rand(4, 3), columns=['a', 'b', 'c']))
        with pytest.raises(ValueError):
            op(df1, df4)

    # test datetime
    df = DataFrame(pd.DataFrame(pd.date_range('20130101', periods=6)))
    for op in [
            operator.eq, operator.ne, operator.lt, operator.gt, operator.le,
            operator.ge
    ]:
        r_df = op(df, datetime(2013, 1, 2))
        pd.testing.assert_index_equal(r_df.index_value.to_pandas(),
                                      df.index_value.to_pandas())

    # test period type
    raw = pd.period_range("2000-01-01", periods=10, freq="D")
    raw_series = pd.Series(raw)
    series = Series(raw, chunk_size=5)
    r = series >= series[1]
    pd.testing.assert_series_equal(r.to_pandas(), raw_series >= raw_series[1])
예제 #7
0
파일: test_core.py 프로젝트: h8f/mars
def test_index_params():
    raw = pd.Series([1, 2, 3], name='a')
    raw.index.name = 'b'
    series = Series(raw)
    series = series[series < 2]
    index = series.index
    index = tile(index)
    c = index.chunks[0]

    assert any(np.isnan(s) for s in c.params['shape'])
    assert np.isnan(c.params['index_value'].min_val)
    c.params = c.get_params_from_data(raw[raw < 2].index)
    # shape and index_value updated
    assert not any(np.isnan(s) for s in c.params['shape'])
    assert not np.isnan(c.params['index_value'].min_val)

    params = c.params.copy()
    params.pop('index', None)
    index.params = params
    assert np.prod(index.shape) > 0
    index.refresh_params()
예제 #8
0
def test_to_frame_or_series(setup):
    raw = pd.Series(np.random.rand(10), name='col')
    series = Series(raw)

    r = series.to_frame()
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(), result)

    r = series.to_frame(name='new_name')
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(name='new_name'), result)

    series = series[series > 0.1]
    r = series.to_frame(name='new_name')
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw[raw > 0.1].to_frame(name='new_name'),
                                  result)

    raw = pd.Index(np.random.rand(10), name='col')
    index = Index(raw)

    r = index.to_frame()
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(), result)

    r = index.to_frame(index=False)
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(index=False), result)

    r = index.to_frame(name='new_name')
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(name='new_name'), result)

    r = index.to_series()
    result = r.execute().fetch()
    pd.testing.assert_series_equal(raw.to_series(), result)

    r = index.to_series(index=pd.RangeIndex(0, 10))
    result = r.execute().fetch()
    pd.testing.assert_series_equal(raw.to_series(index=pd.RangeIndex(0, 10)),
                                   result)

    r = index.to_series(name='new_name')
    result = r.execute().fetch()
    pd.testing.assert_series_equal(raw.to_series(name='new_name'), result)

    raw = pd.MultiIndex.from_tuples([('A', 'E'), ('B', 'F'), ('C', 'G')])
    index = Index(raw, tupleize_cols=True)

    r = index.to_frame()
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(), result)

    with pytest.raises(TypeError):
        index.to_frame(name='XY')

    with pytest.raises(ValueError):
        index.to_frame(name=['X', 'Y', 'Z'])

    r = index.to_frame(name=['X', 'Y'])
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(raw.to_frame(name=['X', 'Y']), result)

    r = index.to_series(name='new_name')
    result = r.execute().fetch()
    pd.testing.assert_series_equal(raw.to_series(name='new_name'), result)