Esempi in Python per DataFrame.set_index

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: cudf.dataframe

Classe/tipologia: DataFrame

Metodo/funzione: set_index

Esempi su hotexamples.com: 9

DataFrame.set_index in Python: 9 esempi trovati. Questi sono i migliori esempi reali in Python per cudf.dataframe.DataFrame.set_index, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

from_pandas(30)

DataFrame(30)

groupby(15)

merge(13)

set_index(9)

query(6)

copy(5)

one_hot_encoding(5)

from_records(4)

label_encoding(3)

as_gpu_matrix(2)

partition_by_hash(2)

from_arrow(2)

as_matrix(2)

replace(1)

sort_values(1)

set_tdf(1)

join(1)

nsmallest(1)

nlargest(1)

index(1)

head(1)

hash_columns(1)

get_tdf(1)

assign(1)

to_arrow(1)

Esempio n. 1

Mostra file

File: test_joining.py Progetto: xiaolin1990/cudf

def test_dataframe_join_cats():
    lhs = DataFrame()
    lhs['a'] = pd.Categorical(list('aababcabbc'), categories=list('abc'))
    lhs['b'] = bb = np.arange(len(lhs))
    lhs = lhs.set_index('a')

    rhs = DataFrame()
    rhs['a'] = pd.Categorical(list('abcac'), categories=list('abc'))
    rhs['c'] = cc = np.arange(len(rhs))
    rhs = rhs.set_index('a')

    got = lhs.join(rhs)
    expect = lhs.to_pandas().join(rhs.to_pandas())

    # Note: pandas make a object Index after joining
    pd.util.testing.assert_frame_equal(
        got.sort_values(by='b').to_pandas().sort_index().reset_index(
            drop=True), expect.reset_index(drop=True))

    # Just do some rough checking here.
    assert list(got.columns) == ['b', 'c']
    assert len(got) > 0
    assert set(got.index.values) & set('abc')
    assert set(got['b']) & set(bb)
    assert set(got['c']) & set(cc)

Esempio n. 2

Mostra file

File: test_joining.py Progetto: xiaolin1990/cudf

def test_dataframe_join_suffix():
    np.random.seed(0)

    df = DataFrame()
    for k in 'abc':
        df[k] = np.random.randint(0, 5, 5)

    left = df.set_index('a')
    right = df.set_index('c')
    with pytest.raises(ValueError) as raises:
        left.join(right)
    raises.match("there are overlapping columns but lsuffix"
                 " and rsuffix are not defined")

    got = left.join(right, lsuffix='_left', rsuffix='_right', sort=True)
    # Get expected value
    pddf = df.to_pandas()
    expect = pddf.set_index('a').join(pddf.set_index('c'),
                                      lsuffix='_left',
                                      rsuffix='_right')
    # Check
    assert list(expect.columns) == list(got.columns)
    assert np.all(expect.index.values == got.index.values)
    for k in expect.columns:
        _check_series(expect[k].fillna(-1), got[k].fillna(-1))

Esempio n. 3

Mostra file

def test_df_cat_sort_index():
    df = DataFrame()
    df['a'] = pd.Categorical(list('aababcabbc'), categories=list('abc'))
    df['b'] = np.arange(len(df))

    got = df.set_index('a').sort_index()
    expect = df.to_pandas().set_index('a').sort_index()

    assert list(expect.columns) == list(got.columns)
    assert list(expect.index.values) == list(got.index.values)
    np.testing.assert_array_equal(expect.index.values, got.index.values)
    np.testing.assert_array_equal(expect['b'].values, got['b'].to_array())

Esempio n. 4

Mostra file

def test_df_set_index_from_series():
    df = DataFrame()
    df['a'] = list(range(10))
    df['b'] = list(range(0, 20, 2))

    # Check set_index(Series)
    df2 = df.set_index(df['b'])
    assert list(df2.columns) == ['a', 'b']
    sliced_strided = df2.loc[2:6]
    print(sliced_strided)
    assert len(sliced_strided) == 3
    assert list(sliced_strided.index.values) == [2, 4, 6]

Esempio n. 5

Mostra file

def test_nonmatching_index_setitem(nrows):
    np.random.seed(0)

    gdf = DataFrame()
    gdf['a'] = np.random.randint(2147483647, size=nrows)
    gdf['b'] = np.random.randint(2147483647, size=nrows)
    gdf = gdf.set_index('b')

    test_values = np.random.randint(2147483647, size=nrows)
    gdf['c'] = test_values
    assert (len(test_values) == len(gdf['c']))
    assert (gdf['c'].to_pandas().equals(
        Series(test_values).set_index(gdf._index).to_pandas()))

Esempio n. 6

Mostra file

def test_df_cat_set_index():
    df = DataFrame()
    df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
    df["b"] = np.arange(len(df))
    got = df.set_index("a")

    pddf = df.to_pandas()
    expect = pddf.set_index("a")

    assert list(expect.columns) == list(got.columns)
    assert list(expect.index.values) == list(got.index.values)
    np.testing.assert_array_equal(expect.index.values, got.index.values)
    np.testing.assert_array_equal(expect["b"].values, got["b"].to_array())

Esempio n. 7

Mostra file

def test_df_set_index_from_name():
    df = DataFrame()
    df['a'] = list(range(10))
    df['b'] = list(range(0, 20, 2))

    # Check set_index(column_name)
    df2 = df.set_index('b')
    print(df2)
    # 1 less column because 'b' is used as index
    assert list(df2.columns) == ['a']
    sliced_strided = df2.loc[2:6]
    print(sliced_strided)
    assert len(sliced_strided) == 3
    assert list(sliced_strided.index.values) == [2, 4, 6]

Esempio n. 8

Mostra file

def test_categorical_index():
    pdf = pd.DataFrame()
    pdf['a'] = [1, 2, 3]
    pdf['index'] = pd.Categorical(['a', 'b', 'c'])
    pdf = pdf.set_index('index')
    gdf1 = DataFrame.from_pandas(pdf)
    gdf2 = DataFrame()
    gdf2['a'] = [1, 2, 3]
    gdf2['index'] = pd.Categorical(['a', 'b', 'c'])
    gdf2 = gdf2.set_index('index')

    assert isinstance(gdf1.index, CategoricalIndex)
    assert_eq(pdf, gdf1)
    assert_eq(pdf.index, gdf1.index)

    assert isinstance(gdf2.index, CategoricalIndex)
    assert_eq(pdf, gdf2)
    assert_eq(pdf.index, gdf2.index)

Esempio n. 9

Mostra file

File: test_index.py Progetto: zeichuan/cudf

def test_categorical_index():
    pdf = pd.DataFrame()
    pdf["a"] = [1, 2, 3]
    pdf["index"] = pd.Categorical(["a", "b", "c"])
    initial_df = DataFrame.from_pandas(pdf)
    pdf = pdf.set_index("index")
    gdf1 = DataFrame.from_pandas(pdf)
    gdf2 = DataFrame()
    gdf2["a"] = [1, 2, 3]
    gdf2["index"] = pd.Categorical(["a", "b", "c"])
    assert_eq(initial_df.index, gdf2.index)
    gdf2 = gdf2.set_index("index")

    assert isinstance(gdf1.index, CategoricalIndex)
    assert_eq(pdf, gdf1)
    assert_eq(pdf.index, gdf1.index)
    assert_eq(pdf.index.codes, gdf1.index.codes.to_array())

    assert isinstance(gdf2.index, CategoricalIndex)
    assert_eq(pdf, gdf2)
    assert_eq(pdf.index, gdf2.index)
    assert_eq(pdf.index.codes, gdf2.index.codes.to_array())