Beispiel #1
0
def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
    if dtype not in ["float32", "float64"] and nulls in ["some", "all"]:
        pytest.skip(msg="nulls not supported in dtype: " + dtype)

    pdf = pd.DataFrame()
    id_vars = []
    for i in range(num_id_vars):
        colname = "id" + str(i)
        data = np.random.randint(0, 26, num_rows).astype(dtype)
        if nulls == "some":
            idx = np.random.choice(
                num_rows, size=int(num_rows / 2), replace=False
            )
            data[idx] = np.nan
        elif nulls == "all":
            data[:] = np.nan
        pdf[colname] = data
        id_vars.append(colname)

    value_vars = []
    for i in range(num_value_vars):
        colname = "val" + str(i)
        data = np.random.randint(0, 26, num_rows).astype(dtype)
        if nulls == "some":
            idx = np.random.choice(
                num_rows, size=int(num_rows / 2), replace=False
            )
            data[idx] = np.nan
        elif nulls == "all":
            data[:] = np.nan
        pdf[colname] = data
        value_vars.append(colname)

    gdf = cudf.from_pandas(pdf)

    got = cudf_melt(frame=gdf, id_vars=id_vars, value_vars=value_vars)
    got_from_melt_method = gdf.melt(id_vars=id_vars, value_vars=value_vars)

    expect = pd.melt(frame=pdf, id_vars=id_vars, value_vars=value_vars)
    # pandas' melt makes the 'variable' column of 'object' type (string)
    # cuDF's melt makes it Categorical because it doesn't support strings
    expect["variable"] = expect["variable"].astype("category")

    assert_eq(expect, got)

    assert_eq(expect, got_from_melt_method)
def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
    if dtype not in ['float32', 'float64'] and nulls in ['some', 'all']:
        pytest.skip(msg='nulls not supported in dtype: ' + dtype)

    pdf = pd.DataFrame()
    id_vars = []
    for i in range(num_id_vars):
        colname = 'id' + str(i)
        data = np.random.randint(0, 26, num_rows).astype(dtype)
        if nulls == 'some':
            idx = np.random.choice(num_rows,
                                   size=int(num_rows / 2),
                                   replace=False)
            data[idx] = np.nan
        elif nulls == 'all':
            data[:] = np.nan
        pdf[colname] = data
        id_vars.append(colname)

    value_vars = []
    for i in range(num_value_vars):
        colname = 'val' + str(i)
        data = np.random.randint(0, 26, num_rows).astype(dtype)
        if nulls == 'some':
            idx = np.random.choice(num_rows,
                                   size=int(num_rows / 2),
                                   replace=False)
            data[idx] = np.nan
        elif nulls == 'all':
            data[:] = np.nan
        pdf[colname] = data
        value_vars.append(colname)

    gdf = DataFrame.from_pandas(pdf)

    got = cudf_melt(frame=gdf, id_vars=id_vars, value_vars=value_vars)
    got_from_melt_method = gdf.melt(id_vars=id_vars, value_vars=value_vars)

    expect = pd.melt(frame=pdf, id_vars=id_vars, value_vars=value_vars)
    # pandas' melt makes the 'variable' column of 'object' type (string)
    # cuDF's melt makes it Categorical because it doesn't support strings
    expect['variable'] = expect['variable'].astype('category')

    pd.testing.assert_frame_equal(expect, got.to_pandas())

    pd.testing.assert_frame_equal(expect, got_from_melt_method.to_pandas())