Ejemplos de hash_object_dispatch en Python

Lenguaje de programación: Python

Namespace/Package Name: dask.dataframe.utils

Método / Función: hash_object_dispatch

Ejemplos en hotexamples.com: 5

Python hash_object_dispatch - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de dask.dataframe.utils.hash_object_dispatch extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: sorting.py Proyecto: zivzone/cudf

def _shuffle_group(df, columns, stage, k, npartitions, ignore_index):
    c = hash_object_dispatch(df[columns], index=False)
    typ = np.min_scalar_type(npartitions * 2)
    c = np.mod(c, npartitions).astype(typ, copy=False)
    if stage > 0:
        np.floor_divide(c, k**stage, out=c)
    if k < int(npartitions / (k**stage)):
        np.mod(c, k, out=c)
    return group_split_dispatch(df,
                                c.astype(np.int32),
                                k,
                                ignore_index=ignore_index)

Ejemplo n.º 2

Mostrar archivo

Archivo: dispatch.py Proyecto: bschifferer/NVTabular

def _hash_series(s):
    """Row-wise Series hash"""
    if isinstance(s, pd.Series):
        # Using pandas hashing, which does not produce the
        # same result as cudf.Series.hash_values().  Do not
        # expect hash-based data transformations to be the
        # same on CPU and CPU.  TODO: Fix this (maybe use
        # murmurhash3 manually on CPU).
        return hash_object_dispatch(s).values
    else:
        if _is_list_dtype(s):
            return s.list.leaves.hash_values()
        else:
            return s.hash_values()

Ejemplo n.º 3

Mostrar archivo

def _shuffle_group_2(df, cols, ignore_index, nparts):
    if not len(df):
        return {}, df

    ind = (hash_object_dispatch(df[cols] if cols else df, index=False) %
           int(nparts)).astype(np.int32)

    n = ind.max() + 1

    result2 = group_split_dispatch(df,
                                   ind.values,
                                   n,
                                   ignore_index=ignore_index)
    return result2, df.iloc[:0]

Ejemplo n.º 4

Mostrar archivo

def _shuffle_group(df, columns, stage, k, npartitions, ignore_index, nfinal):
    ind = hash_object_dispatch(df[columns], index=False)
    if nfinal and nfinal != npartitions:
        # Want to start with final mapping here
        ind = ind % int(nfinal)

    c = ind.values
    typ = np.min_scalar_type(npartitions * 2)
    c = np.mod(c, npartitions).astype(typ, copy=False)
    if stage > 0:
        np.floor_divide(c, k**stage, out=c)
    if k < int(npartitions / (k**stage)):
        np.mod(c, k, out=c)
    return group_split_dispatch(df,
                                c.astype(np.int32),
                                k,
                                ignore_index=ignore_index)

Ejemplo n.º 5

Mostrar archivo

Archivo: sorting.py Proyecto: zivzone/cudf

def set_partitions_hash(df, columns, npartitions):
    c = hash_object_dispatch(df[columns], index=False)
    return np.mod(c, npartitions)