def clean_data(data): # drop problematic row with zero for asthma_rate data = data.drop([141, 142, 149, 153, 158]) # replacing nans with zeros data_nas = data.fillna(0) # print('clean_data') # print(type(data_nas)) return data_nas
def feature_selection(data): all_columns = [ 'pm10_mean', 'pm25_mean', 'pm25non_mean', 'pm25spec_mean', 'co_mean', 'so2_mean', 'no2_mean', 'ozo_mean', 'nonox_mean', 'lead_mean', 'haps_mean', 'vocs_mean', 'smoke_adult', 'obese_adult', 'uninsured', 'pcp', 'high_sch_grad', 'unemployment', 'income_ineq', 'air_poll_partic' ] drop_columns = [ 'pm10_mean', 'pm25_mean', 'pm25non_mean', 'pm25spec_mean', 'co_mean', 'no2_mean', 'ozo_mean', 'nonox_mean', 'lead_mean', 'haps_mean', 'vocs_mean', 'pcp', 'high_sch_grad', 'income_ineq', 'co', 'ca' ] data = data.drop(drop_columns, axis=1) return data, data.columns
def clean_data(data): # drop problematic row with zero for asthma_rate data = data.drop([141, 142, 149, 153, 158]) data_nas = data.fillna(0) return data_nas