def test_df_apply_custom_chunks(nelem): def kernel(in1, in2, in3, out1, out2, extra1, extra2): for i, (x, y, z) in enumerate(zip(in1, in2, in3)): out1[i] = extra2 * x - extra1 * y + z out2[i] = i df = DataFrame() df["in1"] = in1 = np.arange(nelem) df["in2"] = in2 = np.arange(nelem) df["in3"] = in3 = np.arange(nelem) chunks = [0, 7, 11, 29, 101, 777] chunks = [c for c in chunks if c < nelem] extra1 = 2.3 extra2 = 3.4 expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.hstack( np.arange((e - s)) for s, e in zip(chunks, chunks[1:] + [len(df)]) ) outdf = df.apply_chunks( kernel, incols=["in1", "in2", "in3"], outcols=dict(out1=np.float64, out2=np.int32), kwargs=dict(extra1=extra1, extra2=extra2), chunks=chunks, ) got_out1 = outdf["out1"] got_out2 = outdf["out2"] np.testing.assert_array_almost_equal(got_out1.to_array(), expect_out1) np.testing.assert_array_almost_equal(got_out2.to_array(), expect_out2)
def test_df_apply_chunks_incols_mapping(nelem, chunksize): def kernel(q, p, r, out1, out2, extra1, extra2): for i, (a, b, c) in enumerate(zip(q, p, r)): out1[i] = extra2 * a - extra1 * b + c out2[i] = i df = DataFrame() df["in1"] = in1 = np.arange(nelem) df["in2"] = in2 = np.arange(nelem) df["in3"] = in3 = np.arange(nelem) extra1 = 2.3 extra2 = 3.4 expected_out = DataFrame() expected_out["out1"] = extra2 * in1 - extra1 * in2 + in3 expected_out["out2"] = np.arange(len(df)) % chunksize outdf = df.apply_chunks( kernel, incols={"in1": "q", "in2": "p", "in3": "r"}, outcols=dict(out1=np.float64, out2=np.int64), kwargs=dict(extra1=extra1, extra2=extra2), chunks=chunksize, ) assert_eq(outdf[["out1", "out2"]], expected_out)
def test_df_apply_chunks(nelem, chunksize): def kernel(in1, in2, in3, out1, out2, extra1, extra2): for i, (x, y, z) in enumerate(zip(in1, in2, in3)): out1[i] = extra2 * x - extra1 * y + z out2[i] = i df = DataFrame() df["in1"] = in1 = np.arange(nelem) df["in2"] = in2 = np.arange(nelem) df["in3"] = in3 = np.arange(nelem) extra1 = 2.3 extra2 = 3.4 expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.arange(len(df)) % chunksize outdf = df.apply_chunks( kernel, incols=["in1", "in2", "in3"], outcols=dict(out1=np.float64, out2=np.int32), kwargs=dict(extra1=extra1, extra2=extra2), chunks=chunksize, ) got_out1 = outdf["out1"] got_out2 = outdf["out2"] np.testing.assert_array_almost_equal(got_out1.to_array(), expect_out1) np.testing.assert_array_almost_equal(got_out2.to_array(), expect_out2)
def test_df_apply_custom_chunks_blkct_tpb(nelem, blkct, tpb): def kernel(in1, in2, in3, out1, out2, extra1, extra2): for i in range(cuda.threadIdx.x, in1.size, cuda.blockDim.x): x = in1[i] y = in2[i] z = in3[i] out1[i] = extra2 * x - extra1 * y + z out2[i] = i * cuda.blockDim.x df = DataFrame() df["in1"] = in1 = np.arange(nelem) df["in2"] = in2 = np.arange(nelem) df["in3"] = in3 = np.arange(nelem) chunks = [0, 7, 11, 29, 101, 777] chunks = [c for c in chunks if c < nelem] extra1 = 2.3 extra2 = 3.4 expect_out1 = extra2 * in1 - extra1 * in2 + in3 expect_out2 = np.hstack( tpb * np.arange((e - s)) for s, e in zip(chunks, chunks[1:] + [len(df)]) ) outdf = df.apply_chunks( kernel, incols=["in1", "in2", "in3"], outcols=dict(out1=np.float64, out2=np.int32), kwargs=dict(extra1=extra1, extra2=extra2), chunks=chunks, blkct=blkct, tpb=tpb, ) got_out1 = outdf["out1"] got_out2 = outdf["out2"] np.testing.assert_array_almost_equal(got_out1.to_array(), expect_out1) np.testing.assert_array_almost_equal(got_out2.to_array(), expect_out2)