Python one_hot_encode 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sparsity.dask.reshape

메소드/함수: one_hot_encode

hotexamples.com에서의 예제들: 9

Python one_hot_encode - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sparsity.dask.reshape.one_hot_encode에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_one_hot_legacy(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(ddf, 'page_id', list('ABCDE'), ['index', 'id'])
    assert dsf._meta.empty
    sf = dsf.compute()
    assert sf.shape == (100, 5)
    assert isinstance(sf.index, pd.MultiIndex)

예제 #2

파일 보기

파일: test_dask_sparse_frame.py 프로젝트: kayibal/sparsity

def test_one_hot(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(ddf,
                         column='page_id',
                         categories=list('ABCDE'),
                         index_col=['index', 'id'])
    sf = dsf.compute()
    assert sf.shape == (100, 5)
    assert isinstance(sf.index, pd.MultiIndex)

예제 #3

파일 보기

def test_dask_multi_index_loc(clickstream):
    sf = one_hot_encode(dd.from_pandas(clickstream, npartitions=10),
                        categories={
                            'page_id': list('ABCDE'),
                            'other_categorical': list('FGHIJ')
                        },
                        index_col=['index', 'id'])
    res = sf.loc['2016-01-15':'2016-02-15']
    res = res.compute()
    assert res.index.get_level_values(0).date.min() == dt.date(2016, 1, 15)
    assert res.index.get_level_values(0).date.max() == dt.date(2016, 2, 15)

예제 #4

파일 보기

def test_one_hot_disk_categories(clickstream):
    with tmpdir() as tmp:
        cat_path = os.path.join(tmp, 'cat.pickle')
        pd.Series(list('ABCDE')).to_pickle(cat_path)
        ddf = dd.from_pandas(clickstream, npartitions=10)
        dsf = one_hot_encode(ddf,
                             categories={'page_id': cat_path},
                             index_col=['index', 'id'])
        assert dsf._meta.empty
        sf = dsf.compute()
        assert sf.shape == (100, 5)
        assert isinstance(sf.index, pd.MultiIndex)

예제 #5

파일 보기

def test_set_index(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(ddf,
                         categories={
                             'page_id': list('ABCDE'),
                             'other_categorical': list('FGHIJ')
                         },
                         order=['other_categorical', 'page_id'],
                         index_col=['index', 'id'])
    dense = dsf.compute().set_index(level=1).todense()
    res = dsf.set_index(level=1).compute().todense()

    pdt.assert_frame_equal(dense, res)

예제 #6

파일 보기

def test_one_hot_prefixes_sep(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(ddf,
                         categories={
                             'page_id': list('ABCDE'),
                             'other_categorical': list('FGHIJ')
                         },
                         index_col=['index', 'id'],
                         prefixes=True,
                         sep='=')
    correct_columns = list(map(lambda x: 'page_id=' + x, list('ABCDE'))) \
        + list(map(lambda x: 'other_categorical=' + x, list('FGHIJ')))
    assert sorted(dsf.columns) == sorted(correct_columns)

예제 #7

파일 보기

def test_one_hot_no_order(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(ddf,
                         categories={
                             'page_id': list('ABCDE'),
                             'other_categorical': list('FGHIJ')
                         },
                         index_col=['index', 'id'])
    assert dsf._meta.empty
    assert sorted(dsf.columns) == list('ABCDEFGHIJ')
    sf = dsf.compute()
    assert sf.shape == (100, 10)
    assert isinstance(sf.index, pd.MultiIndex)
    assert sorted(sf.columns) == list('ABCDEFGHIJ')

예제 #8

파일 보기

def test_one_hot_dense_column(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(
        ddf,
        categories={
            'page_id': list('ABCDE'),
            'other_categorical': list('FGHIJ'),
            'id': False
        },
    )
    assert dsf._meta.empty
    assert set(dsf.columns) == set('ABCDEFGHIJ') | {'id'}
    sf = dsf.compute()
    assert sf.shape == (100, 11)
    assert set(sf.columns) == set('ABCDEFGHIJ') | {'id'}

예제 #9

파일 보기

def test_one_hot_prefixes(clickstream):
    ddf = dd.from_pandas(clickstream, npartitions=10)
    dsf = one_hot_encode(ddf,
                         categories={
                             'page_id': list('ABCDE'),
                             'other_categorical': list('FGHIJ')
                         },
                         index_col=['index', 'id'],
                         prefixes=True)
    correct_columns = list(map(lambda x: 'page_id_' + x, list('ABCDE'))) \
        + list(map(lambda x: 'other_categorical_' + x, list('FGHIJ')))
    assert dsf._meta.empty
    assert sorted(dsf.columns) == sorted(correct_columns)
    sf = dsf.compute()
    assert sf.shape == (100, 10)
    assert isinstance(sf.index, pd.MultiIndex)
    assert sorted(sf.columns) == sorted(correct_columns)