Example #1
0
def test_make_meta_backends(index):

    dtypes = ["int8", "int32", "int64", "float64"]
    df = cudf.DataFrame(
        {dt: np.arange(start=0, stop=3, dtype=dt)
         for dt in dtypes})
    df["strings"] = ["cat", "dog", "fish"]
    df["cats"] = df["strings"].astype("category")
    df["time_s"] = np.array(["2018-10-07", "2018-10-08", "2018-10-09"],
                            dtype="datetime64[s]")
    df["time_ms"] = df["time_s"].astype("datetime64[ms]")
    df["time_ns"] = df["time_s"].astype("datetime64[ns]")
    df = df.set_index(index)

    # Check "empty" metadata types
    chk_meta = make_meta(df)
    dd.assert_eq(chk_meta.dtypes, df.dtypes)

    # Check "non-empty" metadata types
    chk_meta_nonempty = meta_nonempty(df)
    dd.assert_eq(chk_meta.dtypes, chk_meta_nonempty.dtypes)

    # Check dask code path if not MultiIndex
    if not isinstance(df.index, cudf.MultiIndex):

        ddf = dgd.from_cudf(df, npartitions=1)

        # Check "empty" metadata types
        dd.assert_eq(ddf._meta.dtypes, df.dtypes)

        # Check "non-empty" metadata types
        dd.assert_eq(ddf._meta.dtypes, ddf._meta_nonempty.dtypes)
Example #2
0
def _nonempty_index(idx):
    if isinstance(idx, cudf.core.index.RangeIndex):
        return cudf.core.index.RangeIndex(2, name=idx.name)
    elif isinstance(idx, cudf.core.index.DatetimeIndex):
        start = "1970-01-01"
        data = np.array([start, "1970-01-02"], dtype=idx.dtype)
        values = cudf.core.column.as_column(data)
        return cudf.core.index.DatetimeIndex(values, name=idx.name)
    elif isinstance(idx, cudf.core.index.StringIndex):
        return cudf.core.index.StringIndex(["cat", "dog"], name=idx.name)
    elif isinstance(idx, cudf.core.index.CategoricalIndex):
        key = tuple(idx._data.keys())
        assert len(key) == 1
        categories = idx._data[key[0]].categories
        codes = [0, 0]
        ordered = idx._data[key[0]].ordered
        values = cudf.core.column.build_categorical_column(
            categories=categories, codes=codes, ordered=ordered)
        return cudf.core.index.CategoricalIndex(values, name=idx.name)
    elif isinstance(idx, cudf.core.index.GenericIndex):
        return cudf.core.index.GenericIndex(np.arange(2, dtype=idx.dtype),
                                            name=idx.name)
    elif isinstance(idx, cudf.core.MultiIndex):
        levels = [meta_nonempty(lev) for lev in idx.levels]
        codes = [[0, 0] for i in idx.levels]
        return cudf.core.MultiIndex(levels=levels,
                                    codes=codes,
                                    names=idx.names)

    raise TypeError(f"Don't know how to handle index of type {type(idx)}")
Example #3
0
def meta_nonempty_cudf(x):
    idx = meta_nonempty(x.index)
    columns_with_dtype = dict()
    res = cudf.DataFrame(index=idx)
    for col in x._data.names:
        dtype = str(x._data[col].dtype)
        if dtype not in columns_with_dtype:
            columns_with_dtype[dtype] = cudf.core.column.as_column(
                _get_non_empty_data(x[col]))
        res._data[col] = columns_with_dtype[dtype]
    return res
Example #4
0
def meta_nonempty_cudf(x):
    idx = meta_nonempty(x.index)
    dt_s_dict = dict()
    data = dict()
    for i, c in enumerate(x.columns):
        series = x[c]
        dt = str(series.dtype)
        if dt not in dt_s_dict:
            dt_s_dict[dt] = _nonempty_series(series, idx=idx)
        data[i] = dt_s_dict[dt]
    res = cudf.DataFrame(data, index=idx, columns=np.arange(len(x.columns)))
    res.columns = x.columns
    return res
Example #5
0
def meta_nonempty_cudf(x, index=None):
    y = meta_nonempty(x.to_pandas())  # TODO: add iloc[:5]
    return cudf.from_pandas(y)
Example #6
0
def _(x):
    y = meta_nonempty(x.to_pandas())  # TODO: add iloc[:5]
    return cudf.from_pandas(y)