コード例 #1
0
ファイル: test_statistics.py プロジェクト: qinxuye/mars
def test_series_quantile():
    raw = pd.Series(np.random.rand(10))
    s = series_from_pandas(raw, chunk_size=3)

    r = s.quantile()
    assert isinstance(r, Tensor)
    tile(r)

    s = series_from_pandas(raw, chunk_size=3)

    r = s.quantile([0.3, 0.7])
    assert isinstance(r, Series)
    assert r.shape == (2, )
    pd.testing.assert_index_equal(r.index_value.to_pandas(),
                                  pd.Index([0.3, 0.7]))
    tile(r)
コード例 #2
0
    def testSeriesQuantile(self):
        raw = pd.Series(np.random.rand(10))
        s = series_from_pandas(raw, chunk_size=3)

        r = s.quantile()
        self.assertIsInstance(r, Tensor)
        r.tiles()

        s = series_from_pandas(raw, chunk_size=3)

        r = s.quantile([0.3, 0.7])
        self.assertIsInstance(r, Series)
        self.assertEqual(r.shape, (2, ))
        pd.testing.assert_index_equal(r.index_value.to_pandas(),
                                      pd.Index([0.3, 0.7]))
        r.tiles()
コード例 #3
0
    def testAppendExecution(self):
        executor = ExecutorForTest(storage=new_session().context)

        df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))
        df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))

        mdf1 = from_pandas(df1, chunk_size=3)
        mdf2 = from_pandas(df2, chunk_size=3)

        adf = mdf1.append(mdf2)
        expected = df1.append(df2)
        result = self.executor.execute_dataframe(adf, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        adf = mdf1.append(mdf2, ignore_index=True)
        expected = df1.append(df2, ignore_index=True)
        result = executor.execute_dataframe(adf, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        mdf1 = from_pandas(df1, chunk_size=3)
        mdf2 = from_pandas(df2, chunk_size=2)

        adf = mdf1.append(mdf2)
        expected = df1.append(df2)
        result = self.executor.execute_dataframe(adf, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        adf = mdf1.append(mdf2, ignore_index=True)
        expected = df1.append(df2, ignore_index=True)
        result = executor.execute_dataframe(adf, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        df3 = pd.DataFrame(np.random.rand(8, 4), columns=list('ABCD'))
        mdf3 = from_pandas(df3, chunk_size=3)
        expected = df1.append([df2, df3])
        adf = mdf1.append([mdf2, mdf3])
        result = self.executor.execute_dataframe(adf, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        adf = mdf1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True)
        expected = df1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True)
        result = executor.execute_dataframe(adf, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        # test for series
        series1 = pd.Series(np.random.rand(10,))
        series2 = pd.Series(np.random.rand(10,))

        mseries1 = series_from_pandas(series1, chunk_size=3)
        mseries2 = series_from_pandas(series2, chunk_size=3)

        aseries = mseries1.append(mseries2)
        expected = series1.append(series2)
        result = self.executor.execute_dataframe(aseries, concat=True)[0]
        pd.testing.assert_series_equal(expected, result)

        aseries = mseries1.append(mseries2, ignore_index=True)
        expected = series1.append(series2, ignore_index=True)
        result = executor.execute_dataframe(aseries, concat=True)[0]
        pd.testing.assert_series_equal(expected, result)

        mseries1 = series_from_pandas(series1, chunk_size=3)
        mseries2 = series_from_pandas(series2, chunk_size=2)

        aseries = mseries1.append(mseries2)
        expected = series1.append(series2)
        result = self.executor.execute_dataframe(aseries, concat=True)[0]
        pd.testing.assert_series_equal(expected, result)

        aseries = mseries1.append(mseries2, ignore_index=True)
        expected = series1.append(series2, ignore_index=True)
        result = executor.execute_dataframe(aseries, concat=True)[0]
        pd.testing.assert_series_equal(expected, result)

        series3 = pd.Series(np.random.rand(4,))
        mseries3 = series_from_pandas(series3, chunk_size=2)
        expected = series1.append([series2, series3])
        aseries = mseries1.append([mseries2, mseries3])
        result = self.executor.execute_dataframe(aseries, concat=True)[0]
        pd.testing.assert_series_equal(expected, result)
コード例 #4
0
ファイル: test_merge_execution.py プロジェクト: qinxuye/mars
def test_concat(setup):
    df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))
    df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))

    mdf1 = from_pandas(df1, chunk_size=3)
    mdf2 = from_pandas(df2, chunk_size=3)

    r = concat([mdf1, mdf2])
    expected = pd.concat([df1, df2])
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    # test different chunk size and ignore_index=True
    mdf1 = from_pandas(df1, chunk_size=2)
    mdf2 = from_pandas(df2, chunk_size=3)

    r = concat([mdf1, mdf2], ignore_index=True)
    expected = pd.concat([df1, df2], ignore_index=True)
    result = r.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_frame_equal(expected, result)

    # test axis=1
    mdf1 = from_pandas(df1, chunk_size=2)
    mdf2 = from_pandas(df2, chunk_size=3)

    r = concat([mdf1, mdf2], axis=1)
    expected = pd.concat([df1, df2], axis=1)
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    # test multiply dataframes
    r = concat([mdf1, mdf2, mdf1])
    expected = pd.concat([df1, df2, df1])
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))
    df2 = pd.DataFrame(np.random.rand(10, 3), columns=list('ABC'))

    mdf1 = from_pandas(df1, chunk_size=3)
    mdf2 = from_pandas(df2, chunk_size=3)

    # test join=inner
    r = concat([mdf1, mdf2], join='inner')
    expected = pd.concat([df1, df2], join='inner')
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    # test for series
    series1 = pd.Series(np.random.rand(10, ))
    series2 = pd.Series(np.random.rand(10, ))

    mseries1 = series_from_pandas(series1, chunk_size=3)
    mseries2 = series_from_pandas(series2, chunk_size=3)

    r = concat([mseries1, mseries2])
    expected = pd.concat([series1, series2])
    result = r.execute().fetch()
    pd.testing.assert_series_equal(result, expected)

    # test different series and ignore_index
    mseries1 = series_from_pandas(series1, chunk_size=4)
    mseries2 = series_from_pandas(series2, chunk_size=3)

    r = concat([mseries1, mseries2], ignore_index=True)
    expected = pd.concat([series1, series2], ignore_index=True)
    result = r.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_series_equal(result, expected)

    # test axis=1
    mseries1 = series_from_pandas(series1, chunk_size=3)
    mseries2 = series_from_pandas(series2, chunk_size=3)

    r = concat([mseries1, mseries2], axis=1)
    expected = pd.concat([series1, series2], axis=1)
    result = r.execute(extra_config={'check_shape': False}).fetch()
    pd.testing.assert_frame_equal(result, expected)

    # test merge dataframe and series
    r = concat([mdf1, mseries2], ignore_index=True)
    expected = pd.concat([df1, series2], ignore_index=True)
    result = r.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_frame_equal(result, expected)

    # test merge series and dataframe
    r = concat([mseries1, mdf2], ignore_index=True)
    expected = pd.concat([series1, df2], ignore_index=True)
    result = r.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_frame_equal(result, expected)

    # test merge dataframe and series, axis=1
    r = concat([mdf1, mseries2], axis=1)
    expected = pd.concat([df1, series2], axis=1)
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(result, expected)

    # test merge series and dataframe, axis=1
    r = concat([mseries1, mdf2], axis=1)
    expected = pd.concat([series1, df2], axis=1)
    result = r.execute().fetch()
    pd.testing.assert_frame_equal(result, expected)
コード例 #5
0
ファイル: test_merge_execution.py プロジェクト: qinxuye/mars
def test_append_execution(setup):
    df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))
    df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))

    mdf1 = from_pandas(df1, chunk_size=3)
    mdf2 = from_pandas(df2, chunk_size=3)

    adf = mdf1.append(mdf2)
    expected = df1.append(df2)
    result = adf.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    adf = mdf1.append(mdf2, ignore_index=True)
    expected = df1.append(df2, ignore_index=True)
    result = adf.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_frame_equal(expected, result)

    mdf1 = from_pandas(df1, chunk_size=3)
    mdf2 = from_pandas(df2, chunk_size=2)

    adf = mdf1.append(mdf2)
    expected = df1.append(df2)
    result = adf.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    adf = mdf1.append(mdf2, ignore_index=True)
    expected = df1.append(df2, ignore_index=True)
    result = adf.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_frame_equal(expected, result)

    df3 = pd.DataFrame(np.random.rand(8, 4), columns=list('ABCD'))
    mdf3 = from_pandas(df3, chunk_size=3)
    expected = df1.append([df2, df3])
    adf = mdf1.append([mdf2, mdf3])
    result = adf.execute().fetch()
    pd.testing.assert_frame_equal(expected, result)

    adf = mdf1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True)
    expected = df1.append(dict(A=1, B=2, C=3, D=4), ignore_index=True)
    result = adf.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_frame_equal(expected, result)

    # test for series
    series1 = pd.Series(np.random.rand(10, ))
    series2 = pd.Series(np.random.rand(10, ))

    mseries1 = series_from_pandas(series1, chunk_size=3)
    mseries2 = series_from_pandas(series2, chunk_size=3)

    aseries = mseries1.append(mseries2)
    expected = series1.append(series2)
    result = aseries.execute().fetch()
    pd.testing.assert_series_equal(expected, result)

    aseries = mseries1.append(mseries2, ignore_index=True)
    expected = series1.append(series2, ignore_index=True)
    result = aseries.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_series_equal(expected, result)

    mseries1 = series_from_pandas(series1, chunk_size=3)
    mseries2 = series_from_pandas(series2, chunk_size=2)

    aseries = mseries1.append(mseries2)
    expected = series1.append(series2)
    result = aseries.execute().fetch()
    pd.testing.assert_series_equal(expected, result)

    aseries = mseries1.append(mseries2, ignore_index=True)
    expected = series1.append(series2, ignore_index=True)
    result = aseries.execute(extra_config={'check_index_value': False}).fetch()
    pd.testing.assert_series_equal(expected, result)

    series3 = pd.Series(np.random.rand(4, ))
    mseries3 = series_from_pandas(series3, chunk_size=2)
    expected = series1.append([series2, series3])
    aseries = mseries1.append([mseries2, mseries3])
    result = aseries.execute().fetch()
    pd.testing.assert_series_equal(expected, result)
コード例 #6
0
    def testConcat(self):
        executor = ExecutorForTest(storage=new_session().context)

        df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))
        df2 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))

        mdf1 = from_pandas(df1, chunk_size=3)
        mdf2 = from_pandas(df2, chunk_size=3)

        r = concat([mdf1, mdf2])
        expected = pd.concat([df1, df2])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        # test different chunk size and ignore_index=True
        mdf1 = from_pandas(df1, chunk_size=2)
        mdf2 = from_pandas(df2, chunk_size=3)

        r = concat([mdf1, mdf2], ignore_index=True)
        expected = pd.concat([df1, df2], ignore_index=True)
        result = executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        # test axis=1
        mdf1 = from_pandas(df1, chunk_size=2)
        mdf2 = from_pandas(df2, chunk_size=3)

        r = concat([mdf1, mdf2], axis=1)
        expected = pd.concat([df1, df2], axis=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        # test multiply dataframes
        r = concat([mdf1, mdf2, mdf1])
        expected = pd.concat([df1, df2, df1])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        df1 = pd.DataFrame(np.random.rand(10, 4), columns=list('ABCD'))
        df2 = pd.DataFrame(np.random.rand(10, 3), columns=list('ABC'))

        mdf1 = from_pandas(df1, chunk_size=3)
        mdf2 = from_pandas(df2, chunk_size=3)

        # test join=inner
        r = concat([mdf1, mdf2], join='inner')
        expected = pd.concat([df1, df2], join='inner')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(expected, result)

        # test for series
        series1 = pd.Series(np.random.rand(10, ))
        series2 = pd.Series(np.random.rand(10, ))

        mseries1 = series_from_pandas(series1, chunk_size=3)
        mseries2 = series_from_pandas(series2, chunk_size=3)

        r = concat([mseries1, mseries2])
        expected = pd.concat([series1, series2])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(result, expected)

        # test different series and ignore_index
        mseries1 = series_from_pandas(series1, chunk_size=4)
        mseries2 = series_from_pandas(series2, chunk_size=3)

        r = concat([mseries1, mseries2], ignore_index=True)
        expected = pd.concat([series1, series2], ignore_index=True)
        result = executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(result, expected)

        # test axis=1
        mseries1 = series_from_pandas(series1, chunk_size=3)
        mseries2 = series_from_pandas(series2, chunk_size=3)

        r = concat([mseries1, mseries2], axis=1)
        expected = pd.concat([series1, series2], axis=1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(result, expected)

        # test merge dataframe and series
        r = concat([mdf1, mseries2], ignore_index=True)
        expected = pd.concat([df1, series2], ignore_index=True)
        result = executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(result, expected)

        # test merge series and dataframe
        r = concat([mseries1, mdf2], ignore_index=True)
        expected = pd.concat([series1, df2], ignore_index=True)
        result = executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(result, expected)

        # test merge dataframe and series, axis=1
        r = concat([mdf1, mseries2], axis=1)
        expected = pd.concat([df1, series2], axis=1)
        result = executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(result, expected)

        # test merge series and dataframe, axis=1
        r = concat([mseries1, mdf2], axis=1)
        expected = pd.concat([series1, df2], axis=1)
        result = executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(result, expected)