def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): if dtype not in ["float32", "float64"] and nulls in ["some", "all"]: pytest.skip(msg="nulls not supported in dtype: " + dtype) pdf = pd.DataFrame() id_vars = [] for i in range(num_id_vars): colname = "id" + str(i) data = np.random.randint(0, 26, num_rows).astype(dtype) if nulls == "some": idx = np.random.choice( num_rows, size=int(num_rows / 2), replace=False ) data[idx] = np.nan elif nulls == "all": data[:] = np.nan pdf[colname] = data id_vars.append(colname) value_vars = [] for i in range(num_value_vars): colname = "val" + str(i) data = np.random.randint(0, 26, num_rows).astype(dtype) if nulls == "some": idx = np.random.choice( num_rows, size=int(num_rows / 2), replace=False ) data[idx] = np.nan elif nulls == "all": data[:] = np.nan pdf[colname] = data value_vars.append(colname) gdf = cudf.from_pandas(pdf) got = cudf_melt(frame=gdf, id_vars=id_vars, value_vars=value_vars) got_from_melt_method = gdf.melt(id_vars=id_vars, value_vars=value_vars) expect = pd.melt(frame=pdf, id_vars=id_vars, value_vars=value_vars) # pandas' melt makes the 'variable' column of 'object' type (string) # cuDF's melt makes it Categorical because it doesn't support strings expect["variable"] = expect["variable"].astype("category") assert_eq(expect, got) assert_eq(expect, got_from_melt_method)
def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): if dtype not in ['float32', 'float64'] and nulls in ['some', 'all']: pytest.skip(msg='nulls not supported in dtype: ' + dtype) pdf = pd.DataFrame() id_vars = [] for i in range(num_id_vars): colname = 'id' + str(i) data = np.random.randint(0, 26, num_rows).astype(dtype) if nulls == 'some': idx = np.random.choice(num_rows, size=int(num_rows / 2), replace=False) data[idx] = np.nan elif nulls == 'all': data[:] = np.nan pdf[colname] = data id_vars.append(colname) value_vars = [] for i in range(num_value_vars): colname = 'val' + str(i) data = np.random.randint(0, 26, num_rows).astype(dtype) if nulls == 'some': idx = np.random.choice(num_rows, size=int(num_rows / 2), replace=False) data[idx] = np.nan elif nulls == 'all': data[:] = np.nan pdf[colname] = data value_vars.append(colname) gdf = DataFrame.from_pandas(pdf) got = cudf_melt(frame=gdf, id_vars=id_vars, value_vars=value_vars) got_from_melt_method = gdf.melt(id_vars=id_vars, value_vars=value_vars) expect = pd.melt(frame=pdf, id_vars=id_vars, value_vars=value_vars) # pandas' melt makes the 'variable' column of 'object' type (string) # cuDF's melt makes it Categorical because it doesn't support strings expect['variable'] = expect['variable'].astype('category') pd.testing.assert_frame_equal(expect, got.to_pandas()) pd.testing.assert_frame_equal(expect, got_from_melt_method.to_pandas())