Esempio n. 1
0
def _create_exploration_df(example_test, example_train, is_numeric=False):
    example_cols = ["column1"]
    test_df, train_df = _create_testing_dataframes(example_cols, example_test,
                                                   example_train)
    prep = DataPreprocessor(train_df=train_df, test_df=test_df)
    if is_numeric:
        factor_exploration = prep.explore_numeric_columns()
    else:
        factor_exploration = prep.explore_factors()

    assert len(factor_exploration) == 1
    return factor_exploration["column1"]
Esempio n. 2
0
"""In this example we proof what we have developed, it also
allows us to make some decisions about what we'll do next

Unlike tests which are forced to be very fast,
examples could handle more realistic data"""
from data_loader import DataLoader
from data_preprocessor import DataPreprocessor
from settings import ADULT_TRAIN_DATA_PATH, ADULT_COLUMN_NAMES, ADULT_TEST_DATA_PATH

if __name__ == '__main__':
    # Load data
    loader = DataLoader()
    train_df = loader.load_relative(path=ADULT_TRAIN_DATA_PATH,
                                    columns=ADULT_COLUMN_NAMES)
    test_df = loader.load_relative(path=ADULT_TEST_DATA_PATH,
                                   columns=ADULT_COLUMN_NAMES)

    # Explore data
    prep = DataPreprocessor(train_df, test_df)
    print("FACTOR COLUMNS", "#" * 40)
    prep.print_exploration(prep.explore_factors())
    print("NUMERIC COLUMNS", "#" * 40)
    prep.print_exploration(prep.explore_numeric_columns())