コード例 #1
0
def load_sterilization():
    df = pd.read_csv('../data/sterilization.csv',
                     na_filter=True,
                     na_values=['#DIV/0!'])
    df = jn.DataFrame(df).clean_names().label_encode('treatment')
    mapping = dict(zip(df['treatment'], df['treatment_enc']))
    return df, mapping
コード例 #2
0
def test_multiindex_clean_names_method_chain(multiindex_dataframe):
    df = jn.DataFrame(multiindex_dataframe).clean_names()

    levels = [['a', 'bell_chart', 'decorated_elephant'],
              ['b', 'normal_distribution', 'r_i_p_rhino_']]

    labels = [[0, 1, 2], [0, 1, 2]]

    expected_columns = pd.MultiIndex(levels=levels, labels=labels)
    assert set(df.columns) == set(expected_columns)
コード例 #3
0
def load_finches(path):
    # Load the data
    df = pd.read_csv(path)

    # Data cleaning methods. This is provided for you. Follow along the annotations
    # to learn what's going on.
    df = (jn.DataFrame(df)  # wrap dataframe in a Janitor dataframe.
          .clean_names()    # clean column names
          .rename_column('blength', 'beak_length')  # rename blength to beak_length (readability fix)
          .rename_column('bdepth', 'beak_depth')   # rename bdepth to beak_depth (readability fix)
          .label_encode('species')  # create a `species_enc` column that has the species encoded numerically
         )
    return df
コード例 #4
0
ファイル: example.py プロジェクト: mamonu/pyjanitor
import pandas as pd
import janitor as jn

df = pd.read_excel('dirty_data.xlsx')

df = (jn.DataFrame(df).clean_names().remove_empty().rename_column(
    '%_allocated',
    'percent_allocated').rename_column('full_time?', 'full_time').coalesce(
        ['certification', 'certification.1'],
        'certification').encode_categorical(
            ['subject', 'employee_status',
             'full_time']).convert_excel_date('hire_date'))

print(df)
コード例 #5
0
def test_rename_column(dataframe):
    dataframe = jn.DataFrame(dataframe).clean_names()
    df = dataframe.rename_column('a', 'index')
    assert set(df.columns) == set(
        ['index', 'bell_chart', 'decorated_elephant'])  # noqa: E501
コード例 #6
0
def test_get_features_targets(dataframe):
    dataframe = jn.DataFrame(dataframe).clean_names()
    X, y = dataframe.get_features_targets(target_columns='bell_chart')
    assert X.shape == (3, 2)
    assert y.shape == (3, )
コード例 #7
0
def test_clean_names_method_chain(dataframe):
    df = jn.DataFrame(dataframe).clean_names()
    expected_columns = ['a', 'bell_chart', 'decorated_elephant']
    assert set(df.columns) == set(expected_columns)
コード例 #8
0
def test_fill_empty(null_df):
    df = jn.DataFrame(null_df).fill_empty(columns=['2'], value=3)
    assert set(df.loc[:, '2']) == set([3])
コード例 #9
0
def test_convert_excel_date():
    df = pd.read_excel('examples/dirty_data.xlsx')
    df = jn.DataFrame(df).clean_names()
    df = convert_excel_date(df, 'hire_date')

    assert df['hire_date'].dtype == 'M8[ns]'
コード例 #10
0
def load_kruschke():
    df = pd.read_csv('../data/iq.csv', index_col=0)  # comment out the path to the file for students.
    df = jn.DataFrame(df).label_encode('treatment')
    return df
コード例 #11
0
ファイル: example.py プロジェクト: smeichle/pyjanitor
import pandas as pd
import janitor as jn

df = pd.read_excel("dirty_data.xlsx")

df = (
    jn.DataFrame(df)
    .clean_names()
    .remove_empty()
    .rename_column("%_allocated", "percent_allocated")
    .rename_column("full_time?", "full_time")
    .coalesce(["certification", "certification.1"], "certification")
    .encode_categorical(["subject", "employee_status", "full_time"])
    .convert_excel_date("hire_date")
)

print(df)