def get_resampled_data(subject_id, visit_number): calibration_date_pers, is_calibrated_pm_pers, airspeck_raw = load_personal_airspeck_file( subject_id, subject_visit_number=visit_number, project_name='peeps', upload_type='manual', is_minute_averaged=False, calibrate_pm_and_gas=False, return_calibration_flag=False) # If upload type is automatic -- Change to 'gpsLatitude':'gpsLongitude' # airspeck_raw['gpsAccuracy'] = pd.to_numeric(airspeck_raw['gpsAccuracy']) airspeck_raw.loc[airspeck_raw['gpsAccuracy'] > 1000, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLatitude'] < 28.4, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLatitude'] > 28.9, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLongitude'] < 76.8, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLongitude'] > 77.6, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck = airspeck_raw.resample('1min').mean() return calibration_date_pers, is_calibrated_pm_pers, airspeck
def get_resampled_data(subject_id): calibration_date_pers, is_calibrated_pm_pers, airspeck_raw = load_personal_airspeck_file(subject_id, 'british-heart', upload_type='automatic', calibrate_pm_and_gas=True, return_calibration_flag=True) #airspeck_raw = load_personal_airspeck_file(subject_id, project_name='british-heart', upload_type='automatic') airspeck_raw['gpsAccuracy'] = pd.to_numeric(airspeck_raw['gpsAccuracy']) airspeck_raw.loc[airspeck_raw['gpsAccuracy'] > 1000, 'gpsLatitude':'gpsLongitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLatitude'] < 49.88, 'gpsLatitude':'gpsLongitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLatitude'] > 55.79, 'gpsLatitude':'gpsLongitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLongitude'] < -5.9, 'gpsLatitude':'gpsLongitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLongitude'] > 1.8, 'gpsLatitude':'gpsLongitude'] = np.nan airspeck = airspeck_raw.resample('1min').mean() return airspeck
def plot_philap_report_graphs_for_subject(subject_id, graphs_dir): # Download raw data if not present participant_details = load_philap_participant_details() try: airspeck_raw = load_personal_airspeck_file(subject_id, upload_type='manual', is_minute_averaged=False) airspeck = airspeck_raw.resample('1min').mean() respeck = load_respeck_file(subject_id, upload_type='manual') except: print( "Please download all Peeps data via download_all_philap_data(raw_airspeck=True) " "before calling this function") # Delete incorrect GPS. These coordinates are just outside the larger area of Delhi airspeck_raw.loc[airspeck_raw['gpsAccuracy'] > 1000, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLatitude'] < 10, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLatitude'] > 40, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLongitude'] < 10, 'gpsLongitude':'gpsLatitude'] = np.nan airspeck_raw.loc[airspeck_raw['gpsLongitude'] > 80, 'gpsLongitude':'gpsLatitude'] = np.nan home_gps = airspeck.loc[(1 < airspeck.index.hour) & (airspeck.index.hour <= 3)].mean() # If there was no personal data during the night, fall back on the GPS coordinates the researchers provided if pd.isnull(home_gps['gpsLatitude']): home_gps = get_home_gps_for_subject(subject_id, participant_details) # Select locations near home radius_home = 0.002 correction_factor = airspeck['gpsAccuracy'] * 0.00001 home_mask = (np.abs(airspeck['gpsLatitude'] - home_gps['gpsLatitude']) < radius_home + correction_factor) & \ (np.abs(airspeck['gpsLongitude'] - home_gps['gpsLongitude']) < radius_home + correction_factor) ################################## # Draw detailed exposure plot ################################## sns.set_style('darkgrid', {'xtick.bottom': True, 'xtick.major.size': 5}) fig, ax = plt.subplots(figsize=(15, 5)) if np.count_nonzero(home_mask) > 0: for ts in airspeck.loc[home_mask].index: ax.axvspan(ts, ts + pd.DateOffset(minutes=1), facecolor=CB_color_cycle[0], alpha=0.3, zorder=1) ax.scatter(airspeck.index, airspeck['pm2_5'], s=2, color='black', zorder=2) # Plot stationary airspeck home home_airspeck = load_static_airspeck_file(subject_id, suffix_filename='_home') if home_airspeck is not None and len(home_airspeck) > 0: ax.scatter(home_airspeck.index, home_airspeck['pm2_5'], s=2, color='blue') ax.set_ylabel("PM2.5 (μg/m³)") start = airspeck.index[0].replace(hour=0, minute=0, second=0) end = airspeck.index[-1].replace(hour=0, minute=0, second=0) + pd.DateOffset(days=1) ax.set_xlim(start, end) formatter = mdates.DateFormatter('%d.%m %Hh', tz=dateutil.tz.gettz( project_mapping['philap'][1])) ax.xaxis.set_major_formatter(formatter) ax.set_title( "Continuous PM2.5 personal exposure levels and ambient concentrations") fig.autofmt_xdate() home_patch = mpatches.Patch(color=CB_color_cycle[0], label='Home', alpha=0.3) airs_home_patch = Line2D(range(1), range(1), marker='o', color='#00000000', markerfacecolor="blue", label='Home sensor') airp_patch = Line2D(range(1), range(1), marker='o', color='#00000000', markerfacecolor="black", label='Personal sensor') plt.legend(handles=[home_patch, airp_patch, airs_home_patch]) plt.tight_layout() plt.savefig(graphs_dir + "{}_detailed_exposure.png".format(subject_id), dpi=300) plt.show() ################################## # Draw summary bar graph ################################## sns.set_style('darkgrid', {'xtick.bottom': False, 'xtick.major.size': 0.0}) home = airspeck.loc[home_mask, 'pm2_5'].mean() other = airspeck.loc[~home_mask, 'pm2_5'].mean() overall = airspeck['pm2_5'].mean() if home_airspeck is not None: home_ambient = home_airspeck['pm2_5'].mean() else: home_ambient = np.nan mean_values = [home, other, overall, home_ambient] fig, ax = plt.subplots(figsize=(8, 5)) ax.set_title( "Mean PM2.5 personal exposure levels and ambient concentrations") ax.bar(np.arange(len(mean_values)), mean_values, width=0.5, color=CB_color_cycle, edgecolor="none") ax.set_ylabel("PM2.5 (μg/m³)") ax.set_xlim(-0.5, len(mean_values) - 0.5) plt.xticks(np.arange(len(mean_values)), [ "Home\npersonal", "Other\npersonal", "Overall\npersonal", "Home\nambient" ]) plt.savefig(graphs_dir + "{}_mean_exposure.png".format(subject_id, subject_id), dpi=300) plt.show() ################################## # Draw map ################################## get_maps_image(airspeck_raw, graphs_dir + "{}_airspeck_map.png".format(subject_id), zoom=13) ################################## # Other statistics ################################## # Create new empty file open(graphs_dir + "{}_stats.txt".format(subject_id), 'w').close() # Append stats to this file with open(graphs_dir + "{}_stats.txt".format(subject_id), 'a') as f: f.write("Step count: {}\n".format(respeck['step_count'].sum())) f.write( "Mean breathing rate during night: {:.2f} breaths per minute\n". format(respeck.loc[(0 < respeck.index.hour) & (respeck.index.hour < 6), 'breathing_rate'].mean())) f.write("Mean breathing rate during day: {:.2f} breaths per minute\n". format(respeck.loc[(6 <= respeck.index.hour) & (respeck.index.hour <= 23), 'breathing_rate'].mean())) f.write("\nStart of recording: {}\n".format( airspeck.index[0].replace(tzinfo=None))) f.write("End of recording: {}\n".format( airspeck.index[-1].replace(tzinfo=None))) f.write("Total duration: {}\n".format(airspeck.index[-1] - airspeck.index[0])) f.write("Total recording time at home: {:.1f} h\n".format( np.count_nonzero(home_mask) / 60.))
def create_dublin_pixelgram_for_subject( subject_id, overwrite_pixelgram_if_already_exists=False): download_respeck_and_personal_airspeck_data(subject_id, upload_type='manual') respeck_data = load_respeck_file(subject_id, upload_type='manual') airspeck_data = load_personal_airspeck_file(subject_id, upload_type='manual') # Load correction factors for timezone corrections = pd.read_excel(dublin_timezones_correction_filepath).replace( np.nan, 0).set_index('subject_id') participant_details = load_dublin_participant_details() row = participant_details.loc[subject_id] # Load exposure period from_time = row['start_of_exposure_time_to_shs'] to_time = row['end_of_exposure_time_to_shs'] start_exposure = row['date_of_exposure_to_shs'].replace( hour=from_time.hour, minute=from_time.minute, second=from_time.second).to_pydatetime() + timedelta( hours=int(corrections.loc[subject_id, 'shs_times_difference'])) if not pd.isnull(row['end_date_of_exposure_to_shs']): end_exposure = row['end_date_of_exposure_to_shs'].replace( hour=to_time.hour, minute=to_time.minute, second=to_time.second).to_pydatetime() + timedelta( hours=int(corrections.loc[subject_id, 'shs_times_difference'])) else: end_exposure = row['date_of_exposure_to_shs'].replace( hour=to_time.hour, minute=to_time.minute, second=to_time.second).to_pydatetime() + timedelta( hours=int(corrections.loc[subject_id, 'shs_times_difference'])) # Load recording period from_time = row['start_time_of_monitoring'] start_recording = row['start_date_of_monitoring'].replace( hour=from_time.hour, minute=from_time.minute, second=from_time.second).to_pydatetime() + timedelta( hours=int(corrections.loc[subject_id, 'recording_times_difference'])) to_time = row['end_time_of_monitoring'] end_recording = row['end_date_of_monitoring'].replace( hour=to_time.hour, minute=to_time.minute, second=to_time.second).to_pydatetime() + timedelta( hours=int(corrections.loc[subject_id, 'recording_times_difference'])) # Look up timezone tz = timezone(project_mapping[subject_id[:3]][1]) print("Creating pixelgram for subject {}".format(subject_id)) plot_combined_pixelgram_dublin( subject_id, respeck_data, airspeck_data, exposure_period=[ tz.localize(start_exposure), tz.localize(end_exposure) ], recording_period=[ tz.localize(start_recording), tz.localize(end_recording) ], overwrite_if_already_exists=overwrite_pixelgram_if_already_exists)
def download_data_and_plot_combined_pixelgram( subject_id, timeframe=None, filter_out_not_worn_respeck=True, overwrite_pixelgram_if_already_exists=False, subject_visit_number=None, overwrite_data_if_already_exists=False, upload_type='automatic'): project_name = get_project_for_subject(subject_id) plot_dir = project_mapping[project_name][3] if subject_visit_number is None: label_files = "{}".format(subject_id) else: label_files = "{}({})".format(subject_id, subject_visit_number) pixelgram_filepath = plot_dir + "{}_combined_pixelgram.png".format( label_files) # Check if pixelgram already exists if not overwrite_pixelgram_if_already_exists and os.path.isfile( pixelgram_filepath): print("Pixelgram for subject {} already exists. Skipping subject.". format(label_files)) return # Download data if not present download_respeck_and_personal_airspeck_data( subject_id, upload_type=upload_type, timeframe=timeframe, overwrite_if_already_exists=overwrite_data_if_already_exists, subject_visit_number=subject_visit_number) # Load data and create plot respeck_data = load_respeck_file( subject_id, project_name=project_name, upload_type=upload_type, subject_visit_number=subject_visit_number, filter_out_not_worn=filter_out_not_worn_respeck) airspeck_data = load_personal_airspeck_file( subject_id, project_name=project_name, upload_type=upload_type, subject_visit_number=subject_visit_number) if len(respeck_data) == 0: print("RESpeck data for subject {} empty. Skipping subject.".format( label_files)) return if len(airspeck_data) == 0: print("Airspeck data for subject {} empty. Skipping subject.".format( label_files)) return if timeframe is not None: tz = timezone(project_mapping[project_name][1]) if timeframe[0].tzinfo is None: start_time = tz.localize(timeframe[0]) end_time = tz.localize(timeframe[1]) else: start_time = timeframe[0] end_time = timeframe[1] plot_combined_pixelgram( subject_id, respeck_data[start_time:end_time], airspeck_data[start_time:end_time], pixelgram_filepath, overwrite_if_already_exists=overwrite_pixelgram_if_already_exists, subject_visit_number=subject_visit_number) else: plot_combined_pixelgram( subject_id, respeck_data, airspeck_data, pixelgram_filepath, overwrite_if_already_exists=overwrite_pixelgram_if_already_exists, subject_visit_number=subject_visit_number)