def clean_data(data):
    # drop problematic row with zero for asthma_rate
    data = data.drop([141, 142, 149, 153, 158])
    # replacing nans with zeros
    data_nas = data.fillna(0)
    # print('clean_data')
    # print(type(data_nas))
    return data_nas
Beispiel #2
0
def feature_selection(data):

    all_columns = [
        'pm10_mean', 'pm25_mean', 'pm25non_mean', 'pm25spec_mean', 'co_mean',
        'so2_mean', 'no2_mean', 'ozo_mean', 'nonox_mean', 'lead_mean',
        'haps_mean', 'vocs_mean', 'smoke_adult', 'obese_adult', 'uninsured',
        'pcp', 'high_sch_grad', 'unemployment', 'income_ineq',
        'air_poll_partic'
    ]

    drop_columns = [
        'pm10_mean', 'pm25_mean', 'pm25non_mean', 'pm25spec_mean', 'co_mean',
        'no2_mean', 'ozo_mean', 'nonox_mean', 'lead_mean', 'haps_mean',
        'vocs_mean', 'pcp', 'high_sch_grad', 'income_ineq', 'co', 'ca'
    ]

    data = data.drop(drop_columns, axis=1)

    return data, data.columns
def clean_data(data):
    # drop problematic row with zero for asthma_rate
    data = data.drop([141, 142, 149, 153, 158])
    data_nas = data.fillna(0)
    return data_nas