Python DataFrame.apply_chunksの例

プログラミング言語: Python

名前空間/パッケージ名: cudf

クラス/型: DataFrame

メソッド/関数: apply_chunks

hotexamples.comのコード掲載数: 4

Python DataFrame.apply_chunks - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcudf.DataFrame.apply_chunksの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DataFrame(30)

from_pandas(30)

_from_data(16)

to_pandas(14)

_from_table(10)

drop(10)

merge(7)

copy(7)

take(5)

from_gpu_matrix(5)

equals(4)

one_hot_encoding(4)

set_index(4)

apply_chunks(4)

add_column(4)

columns(3)

label_encoding(3)

name(3)

dropna(3)

query(3)

sort_values(2)

to_records(2)

_concat(2)

from_records(2)

append(2)

apply_rows(2)

_apply(1)

serialize(1)

to_parquet(1)

_apply_support_method(1)

to_dlpack(1)

to_cupy(1)

to_arrow(1)

scatter_by_map(1)

select_dtypes(1)

join(1)

repeat(1)

argsort(1)

as_gpu_matrix(1)

nsmallest(1)

nlargest(1)

drop_duplicates(1)

from_arrow(1)

memory_usage(1)

insert(1)

コード例 #1

ファイルを表示

ファイル: test_cuda_apply.py プロジェクト: TravisHester/cudf

def test_df_apply_custom_chunks(nelem):
    def kernel(in1, in2, in3, out1, out2, extra1, extra2):
        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
            out1[i] = extra2 * x - extra1 * y + z
            out2[i] = i

    df = DataFrame()
    df["in1"] = in1 = np.arange(nelem)
    df["in2"] = in2 = np.arange(nelem)
    df["in3"] = in3 = np.arange(nelem)

    chunks = [0, 7, 11, 29, 101, 777]
    chunks = [c for c in chunks if c < nelem]

    extra1 = 2.3
    extra2 = 3.4

    expect_out1 = extra2 * in1 - extra1 * in2 + in3
    expect_out2 = np.hstack(
        np.arange((e - s)) for s, e in zip(chunks, chunks[1:] + [len(df)])
    )

    outdf = df.apply_chunks(
        kernel,
        incols=["in1", "in2", "in3"],
        outcols=dict(out1=np.float64, out2=np.int32),
        kwargs=dict(extra1=extra1, extra2=extra2),
        chunks=chunks,
    )

    got_out1 = outdf["out1"]
    got_out2 = outdf["out2"]

    np.testing.assert_array_almost_equal(got_out1.to_array(), expect_out1)
    np.testing.assert_array_almost_equal(got_out2.to_array(), expect_out2)

コード例 #2

ファイルを表示

ファイル: test_cuda_apply.py プロジェクト: TravisHester/cudf

def test_df_apply_chunks_incols_mapping(nelem, chunksize):
    def kernel(q, p, r, out1, out2, extra1, extra2):
        for i, (a, b, c) in enumerate(zip(q, p, r)):
            out1[i] = extra2 * a - extra1 * b + c
            out2[i] = i

    df = DataFrame()
    df["in1"] = in1 = np.arange(nelem)
    df["in2"] = in2 = np.arange(nelem)
    df["in3"] = in3 = np.arange(nelem)

    extra1 = 2.3
    extra2 = 3.4

    expected_out = DataFrame()
    expected_out["out1"] = extra2 * in1 - extra1 * in2 + in3
    expected_out["out2"] = np.arange(len(df)) % chunksize

    outdf = df.apply_chunks(
        kernel,
        incols={"in1": "q", "in2": "p", "in3": "r"},
        outcols=dict(out1=np.float64, out2=np.int64),
        kwargs=dict(extra1=extra1, extra2=extra2),
        chunks=chunksize,
    )

    assert_eq(outdf[["out1", "out2"]], expected_out)

コード例 #3

ファイルを表示

ファイル: test_cuda_apply.py プロジェクト: TravisHester/cudf

def test_df_apply_chunks(nelem, chunksize):
    def kernel(in1, in2, in3, out1, out2, extra1, extra2):
        for i, (x, y, z) in enumerate(zip(in1, in2, in3)):
            out1[i] = extra2 * x - extra1 * y + z
            out2[i] = i

    df = DataFrame()
    df["in1"] = in1 = np.arange(nelem)
    df["in2"] = in2 = np.arange(nelem)
    df["in3"] = in3 = np.arange(nelem)

    extra1 = 2.3
    extra2 = 3.4

    expect_out1 = extra2 * in1 - extra1 * in2 + in3
    expect_out2 = np.arange(len(df)) % chunksize

    outdf = df.apply_chunks(
        kernel,
        incols=["in1", "in2", "in3"],
        outcols=dict(out1=np.float64, out2=np.int32),
        kwargs=dict(extra1=extra1, extra2=extra2),
        chunks=chunksize,
    )

    got_out1 = outdf["out1"]
    got_out2 = outdf["out2"]

    np.testing.assert_array_almost_equal(got_out1.to_array(), expect_out1)
    np.testing.assert_array_almost_equal(got_out2.to_array(), expect_out2)

コード例 #4

ファイルを表示

ファイル: test_cuda_apply.py プロジェクト: TravisHester/cudf

def test_df_apply_custom_chunks_blkct_tpb(nelem, blkct, tpb):
    def kernel(in1, in2, in3, out1, out2, extra1, extra2):
        for i in range(cuda.threadIdx.x, in1.size, cuda.blockDim.x):
            x = in1[i]
            y = in2[i]
            z = in3[i]
            out1[i] = extra2 * x - extra1 * y + z
            out2[i] = i * cuda.blockDim.x

    df = DataFrame()
    df["in1"] = in1 = np.arange(nelem)
    df["in2"] = in2 = np.arange(nelem)
    df["in3"] = in3 = np.arange(nelem)

    chunks = [0, 7, 11, 29, 101, 777]
    chunks = [c for c in chunks if c < nelem]

    extra1 = 2.3
    extra2 = 3.4

    expect_out1 = extra2 * in1 - extra1 * in2 + in3
    expect_out2 = np.hstack(
        tpb * np.arange((e - s))
        for s, e in zip(chunks, chunks[1:] + [len(df)])
    )

    outdf = df.apply_chunks(
        kernel,
        incols=["in1", "in2", "in3"],
        outcols=dict(out1=np.float64, out2=np.int32),
        kwargs=dict(extra1=extra1, extra2=extra2),
        chunks=chunks,
        blkct=blkct,
        tpb=tpb,
    )

    got_out1 = outdf["out1"]
    got_out2 = outdf["out2"]

    np.testing.assert_array_almost_equal(got_out1.to_array(), expect_out1)
    np.testing.assert_array_almost_equal(got_out2.to_array(), expect_out2)