def process_sensor_data(df): ################### Extracting the labels ######################### # Get pollutants values # air_data_df = df[['pm25', 'pm10', 'no2', 'co', 'so2', 'o3']].copy() air_data_df = df[['pm25']].copy() # air_data_df = air_data_df.reindex(columns=['o3_8', 'o3', 'pm10', 'pm25', 'co', 'so2', 'so2_24', 'no2']) empty_pol = ['o3_8', 'so2_24', 'pm10', 'co', 'so2', 'no2', 'o3'] air_data_df = create_empty_pollutants_columns_(air_data_df, empty_pol, ['pm25']) air_data_np = air_data_df.to_numpy() rows, cols = air_data_np.shape # Calculate AQI column AQI_list_np, AQI_rank_np = calc_AQI_from_df(air_data_df) # Add AQI column to dataframe air_data_df['aqi'] = AQI_list_np air_data_df['aqi_rank'] = AQI_rank_np ################################################################### # Add timestamp features df_copy = df.rename(columns={'time': 'timestamp'}) df_copy = df_copy.drop(columns=['pm25']) # remove unecessary columns result_raw, result_processed = get_timestamp_features(df_copy) return result_raw, result_processed, air_data_df
def process_sensor_data(df): ################### Extracting the labels ######################### # Get pollutants values air_data_df = df[['pm25', 'pm10', 'no2', 'co', 'so2', 'o3']].copy() # Add zeros values to missing pollutants air_data_df.insert(0, 'o3_8', 0) air_data_df.insert(6, 'so2_24', 0) air_data_df = air_data_df.reindex( columns=['o3_8', 'o3', 'pm10', 'pm25', 'co', 'so2', 'so2_24', 'no2']) air_data_np = air_data_df.to_numpy() rows, cols = air_data_np.shape # Calculate AQI column AQI_list_np, AQI_rank_np = calc_AQI_from_df(air_data_df) # Add AQI column to dataframe air_data_df['aqi'] = AQI_list_np air_data_df['aqi_rank'] = AQI_rank_np ################################################################### # Add timestamp features df_copy = df.rename(columns={'time': 'timestamp'}) df_copy = df_copy.drop(columns=[ 'o3', 'pm10', 'pm25', 'co', 'so2', 'no2', 'heartbeat', 'fah', 'image' ]) # remove unecessary columns result_raw, result_processed = get_timestamp_features(df_copy) return result_raw, result_processed, air_data_df
def process_merged_data(df, model_path): ################### Extracting the labels ######################### # Get pollutants values air_data_df = df[['pm25', 'pm10', 'no2', 'co', 'so2', 'o3']].copy() # Add zeros values to missing pollutants air_data_df.insert(0, 'o3_8', 0) air_data_df.insert(6, 'so2_24', 0) air_data_df = air_data_df.reindex( columns=['o3_8', 'o3', 'pm10', 'pm25', 'co', 'so2', 'so2_24', 'no2']) air_data_np = air_data_df.to_numpy() rows, cols = air_data_np.shape # Calculate AQI column AQI_list_np, AQI_rank_np = calc_AQI_from_df(air_data_df) # Add AQI column to dataframe air_data_df['aqi'] = AQI_list_np air_data_df['aqi_rank'] = AQI_rank_np ################################################################### ################# Process timestamp features ###################### df_copy = df.rename(columns= {'time': 'timestamp', \ 'image name': 'image', \ 'greenness degree: 1 (building) -> 5 (greenness)': 'greenness degree',\ 'cleanliness degree: 1 (filthy) -> 5 (cleanliness)': 'cleanliness degree',\ 'crowdedness degree: (vehicle density: 1 (very light) -> 5 (high dense)/pedestrian density: 1 (very light) -> 5 (high dense)': 'crowdedness degree',\ 'noisy degree: 1 (very quiet) -> 5 (very noisy)': 'noisy degree',\ 'skin feeling degree: 1 (bad) -> 5 (good)': 'skin feeling degree',\ 'stress degree: ( 1stressed, 2depressed, 3calm, 4relaxed, 5excited)': 'stress degree',\ 'personal aqi degree: 1 (fresh air) -> 5 (absolute pollution)': 'personal aqi degree',\ 'do you want to use this route so that you can protect your health and safety (i.e., avoid air pollution, congestion, and obstruction)? safety degree: 1 (not want at all) -> 5': 'safety degree' }) df_copy.drop(columns=['o3', 'pm10', 'pm25', 'co', 'so2', 'no2'], inplace=True) # remove unecessary columns df_copy.insert(4, 'lat', df_copy.location.str.split(",").str[0]) df_copy.insert(5, 'lon', df_copy.location.str.split(",").str[1]) df_copy['lat'] = pd.to_numeric(df_copy['lat'], errors='raise') df_copy['lon'] = pd.to_numeric(df_copy['lon'], errors='raise') df_copy.drop(columns=["location"], inplace=True) result_raw, result_processed = get_timestamp_features(df_copy) #################################################################### ################ Process image features ############################ image_path = result_processed[ "image_folder_path"] + os.sep + result_processed["image"] image_path = image_path.to_frame("image_path") image_features = Image_features.get_image_features(image_path, model_path) result_raw = pd.concat([result_raw, image_features], axis=1) result_processed = pd.concat([result_processed, image_features], axis=1) return result_raw, result_processed, air_data_df
def process_merged_data(df, model_path): ################### Extracting the labels ######################### # Get pollutants values air_data_df = df[['pm25', 'pm10', 'no2', 'co', 'so2', 'o3']].copy() # Add zeros values to missing pollutants air_data_df.insert(0, 'o3_8', 0) air_data_df.insert(6, 'so2_24', 0) air_data_df = air_data_df.reindex( columns=['o3_8', 'o3', 'pm10', 'pm25', 'co', 'so2', 'so2_24', 'no2']) air_data_np = air_data_df.to_numpy() rows, cols = air_data_np.shape # Calculate AQI column AQI_list_np, AQI_rank_np = calc_AQI_from_df(air_data_df) # Add AQI column to dataframe air_data_df['aqi'] = AQI_list_np air_data_df['aqi_rank'] = AQI_rank_np ################################################################### ################# Process timestamp features ###################### df_copy = df.rename( columns={ 'time': 'timestamp', 'greenness_degree': 'greenness degree', 'cleanliness_degree': 'cleanliness degree', 'crowdedness_degree': 'crowdedness degree', 'noisy_degree': 'noisy degree', 'skin_feeling_degree': 'skin feeling degree', 'stress_degree': 'stress degree', 'personal_aqi_degree': 'personal aqi degree', ' health_and_safety': 'safety degree' }) df_copy.drop(columns=['o3', 'pm10', 'pm25', 'co', 'so2', 'no2'], inplace=True) df_copy.insert(4, 'lat', df_copy.location.str.split(",").str[0]) df_copy.insert(5, 'lon', df_copy.location.str.split(",").str[1]) df_copy['lat'] = pd.to_numeric(df_copy['lat'], errors='raise') df_copy['lon'] = pd.to_numeric(df_copy['lon'], errors='raise') df_copy.drop(columns=["location"], inplace=True) result_raw, result_processed = get_timestamp_features(df_copy) #################################################################### ################ Process image features ############################ image_path = result_processed[ "image_folder_path"] + os.sep + result_processed["image"] image_path = image_path.to_frame("image_path") image_features = Image_features.get_image_features(image_path, model_path) result_raw = pd.concat([result_raw, image_features], axis=1) result_processed = pd.concat([result_processed, image_features], axis=1) return result_raw, result_processed, air_data_df