Python drop_duplicates Examples

Programming Language: Python

Namespace/Package Name: cudf_workaround

Method/Function: drop_duplicates

Examples at hotexamples.com: 2

Python drop_duplicates - 2 examples found. These are the top rated real world Python examples of cudf_workaround.drop_duplicates extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def get_unique_tokens(words, words_hash=None):
    df = gd.DataFrame()
    df['hash'] = on_gpu(words, 'hash') if words_hash is None else words_hash
    df['ID'] = np.arange(words.size()).astype(np.int32)
    df = drop_duplicates(df, by='hash', keep='first')
    rows = df['ID'].to_array()  #.astype(np.int32)
    res = words.sublist(rows.tolist())
    del df
    return res

Example #2

Show file

File: nvstring_workaround.py Project: yidong72/notebooks-extended

def get_token_counts(words, words_hash=None):
    df = gd.DataFrame()
    df['hash'] = on_gpu(words, 'hash') if words_hash is None else words_hash
    df['ID'] = np.arange(words.size()).astype(np.int32)
    dg = df.groupby('hash').agg({'hash': 'count'})
    df = drop_duplicates(df, by='hash', keep='first')
    df = df.merge(dg, on=['hash'], how='left')
    rows = df['ID'].to_array()  #.astype(np.int32)
    res = words.sublist(rows.tolist()).to_host()
    #res = pd.DataFrame({'tokens':res,'count':df['count_hash'].to_array()})
    res = dict(zip(res, df['count_hash'].to_array().tolist()))
    #del df
    return Counter(res)