예제 #1
0
def load_static_airspeck_file(sid_or_uuid,
                              project_name=None,
                              sensor_label=None,
                              suffix_filename="",
                              upload_type='automatic',
                              subject_visit_number=None,
                              calibrate_pm=False,
                              calibrate_ox=False,
                              calibrate_no2=False,
                              use_all_features_for_pm_calibration=False,
                              use_all_features_for_gas_calibration=True,
                              return_calibration_flag=False,
                              calibration_id=None,
                              filename=None,
                              country_name=None):
    assert upload_type in [
        'automatic', 'sd_card'
    ], "upload_type has to be either 'automatic' or 'sd_card'"

    if project_name is None and len(sid_or_uuid) == 6:
        project_name = get_project_for_subject(sid_or_uuid)

    if sensor_label is None:
        if subject_visit_number is None:
            sensor_label = "{}".format(sid_or_uuid)
        else:
            sensor_label = "{}({})".format(sid_or_uuid, subject_visit_number)

    if filename is None:
        filename = "{}_static_airspeck_{}{}.csv".format(
            sensor_label, upload_type, suffix_filename)

    print("Loading file: {}".format(project_mapping[project_name][2] +
                                    filename))
    data = load_airrespeck_file(project_mapping[project_name][2] + filename,
                                project_name)

    #Sdata = filter_out_outliers_gas(data)

    if calibrate_pm or calibrate_ox or calibrate_no2:
        result_date, was_calibrated_pm, was_calibrated_no2, was_calibrated_ox, data = calibrate_airspeck(
            sid_or_uuid,
            data,
            calibrate_pm=calibrate_pm,
            calibrate_no2=calibrate_no2,
            calibrate_ox=calibrate_ox,
            project_name=project_name,
            calibration_id=calibration_id,
            use_all_features_pm=use_all_features_for_pm_calibration,
            use_all_features_gas=use_all_features_for_gas_calibration,
            country_name=country_name)

        if return_calibration_flag:
            return result_date, was_calibrated_pm, was_calibrated_no2, was_calibrated_ox, data

    return data
예제 #2
0
def load_respeck_file(subject_id,
                      project_name=None,
                      filter_out_not_worn=True,
                      subject_visit_number=None,
                      upload_type='automatic',
                      suffix_filename="",
                      raw_file=False):
    '''
    Load a Respeck csv file to a pandas dataframe in the correct timezone
    :param subject_id: 6-character subject ID
    :param project_name: For some projects, this is the actual name "daphne", for others, it's the project ID.
    See constants.py for a list of all project names
    :param filter_out_not_worn: Whether to filter out those periods where the Respeck was most likely not worn. These
    are the periods where the activity level is below a threshold for some time.
    :param subject_visit_number: Which of several recordings of a subject should be loaded.
    Only relevant for some projects like Daphne
    :return: Respeck data as pandas dataframe.
    '''

    if subject_visit_number is None:
        label_files = "{}".format(subject_id)
    else:
        label_files = "{}({})".format(subject_id, int(subject_visit_number))

    if project_name is None:
        project_name = get_project_for_subject(subject_id)

    if raw_file:
        filename = "{}_respeck_{}_raw{}.csv".format(label_files, upload_type,
                                                    suffix_filename)
    else:
        filename = "{}_respeck_{}{}.csv".format(label_files, upload_type,
                                                suffix_filename)

    print("Loading file: {}".format(project_mapping[project_name][2] +
                                    filename))
    respeck_data = load_airrespeck_file(
        project_mapping[project_name][2] + filename, project_name)

    if respeck_data is not None and filter_out_not_worn and len(
            respeck_data) > 0:
        set_breathing_rate_nan_when_lying_on_stomach(respeck_data)
        set_breathing_rate_nan_when_not_worn(respeck_data)

    return respeck_data
예제 #3
0
def download_respeck_data(subject_id,
                          upload_type='automatic',
                          is_minute_averaged=True,
                          timeframe=None,
                          overwrite_if_already_exists=False,
                          subject_visit_number=None,
                          suffix_filename="",
                          filename=None,
                          project_name=None,
                          out_directory=None):
    assert upload_type in [
        'automatic', 'manual'
    ], "Upload type has to be either automatic or manual"

    assert is_minute_averaged or upload_type is not 'automatic', \
        "Only minute averaged data is automatically uploaded. Set is_minute_average=False."

    if project_name is None:
        project_name = get_project_for_subject(subject_id)

    if out_directory is None:
        out_directory = project_mapping[project_name][2]

    if subject_visit_number is None:
        label_files = "{}".format(subject_id)
    else:
        label_files = "{}({})".format(subject_id, int(subject_visit_number))

    if filename is None:
        if is_minute_averaged:
            filename = "{}_respeck_{}{}.csv".format(label_files, upload_type,
                                                    suffix_filename)
        else:
            filename = "{}_respeck_{}_raw{}.csv".format(
                label_files, upload_type, suffix_filename)

    if timeframe is None:
        # Set to a timeframe which will definitely include all data
        timeframe = [datetime(2016, 1, 1), datetime(2100, 1, 1)]

    if os.path.isfile(out_directory + "/" +
                      filename) and not overwrite_if_already_exists:
        print("Data already downloaded")
        return

    if upload_type == 'automatic':
        download_respeck_minute_from_datastore(subject_id,
                                               out_filepath=out_directory +
                                               filename,
                                               timeframe=timeframe,
                                               project_name=project_name,
                                               upload_type='automatic')
    elif upload_type == 'manual':
        if is_minute_averaged:
            download_respeck_minute_from_datastore(subject_id,
                                                   out_filepath=out_directory +
                                                   filename,
                                                   timeframe=timeframe,
                                                   project_name=project_name,
                                                   upload_type='manual')
        else:
            download_raw_respeck_from_google_storage(
                subject_id,
                out_directory=out_directory,
                out_filename=filename,
                timeframe=timeframe,
                project_name=project_name,
                overwrite_file_if_existing=overwrite_if_already_exists,
                subject_visit_number=subject_visit_number)
    print('Done')
예제 #4
0
def download_from_google_storage(subject_id,
                                 prefix_storage_filename,
                                 timestamp_label,
                                 out_filename,
                                 out_directory=None,
                                 project_name=None,
                                 timeframe=None,
                                 force_download=False,
                                 store_raw=False):
    if project_name is None:
        project_name = get_project_for_subject(subject_id)

    if out_directory is None:
        out_directory = project_mapping[project_name][2]

    if os.path.isfile(out_directory + out_filename) and not force_download:
        print("Data already downloaded")
        return

    # Did user pass timeframe? If not, load all data
    if timeframe is None:
        timeframe = [datetime(2016, 1, 1), datetime(2100, 1, 1)]

    # Select the timeframe, after accounting for timezone difference
    tz = timezone(project_mapping[project_name][1])

    if timeframe[0].tzinfo is None:
        localised_start = tz.localize(timeframe[0])
        localised_end = tz.localize(timeframe[1])
    else:
        localised_start = timeframe[0]
        localised_end = timeframe[1]

    data = pd.DataFrame()
    storage_client = storage.Client('specknet-pyramid-test')
    bucket = storage_client.get_bucket(project_mapping[project_name][0])

    for blob in bucket.list_blobs(prefix='AirRespeck/{}'.format(subject_id)):
        filename = blob.name.split("/")[-1]
        if subject_id in filename and prefix_storage_filename in filename:
            if timeframe is not None:
                date_of_file = tz.localize(
                    datetime.strptime(filename[-14:-4], "%Y-%m-%d"))
                # Skip file if it's not in the timeframe we're interested in!
                if date_of_file < localised_start.replace(hour=0, minute=0, second=0) or \
                        date_of_file > localised_end:
                    continue

            temp_file = out_directory + "temp/" + filename

            # Create temp directory if it doesn't exist yet
            if not os.path.exists(out_directory + "temp"):
                os.makedirs(out_directory + "temp")

            if not os.path.isfile(temp_file):
                blob.download_to_filename(temp_file)

            # If data is encrypted, overwrite with decrypted version
            with open(temp_file) as file:
                if file.readline().strip() == "Encrypted":
                    # Decrypt file before continuing
                    print("File is being decrypted")
                    decrypt_file(temp_file, temp_file)
                else:
                    # Try converting all dates. If this failes, some lines are probably encrypted
                    temp_data = pd.read_csv(temp_file, error_bad_lines=False)
                    try:
                        pd.to_datetime(temp_data[timestamp_label],
                                       unit='ms',
                                       exact=False)
                    except:
                        partly_decrypt_file(temp_file, temp_file)

            data = data.append(pd.read_csv(temp_file, error_bad_lines=False))

    if len(data) > 0:
        data[timestamp_label] = pd.to_datetime(data[timestamp_label],
                                               unit='ms',
                                               exact=False)

        # Calculate minute averages
        if prefix_storage_filename in ["Airspeck", "GPSPhone"
                                       ] and not store_raw:
            data = data.groupby(data[timestamp_label].apply(
                lambda d: d.replace(second=0, microsecond=0))).mean()
        else:
            # Don't minute average here, but simply set the timestamp column as index
            data = data.set_index(data[timestamp_label]).sort_index()
            # Delete the original column
            data = data.drop(timestamp_label, axis=1)

        # Re-insert (copy) timestamp column from index, so that it is saved
        if 'timestamp' not in data.columns:
            data.insert(0, 'timestamp', data.index)

        # Remove NaTs from index
        data = data.loc[data.index.notnull()]

        # If we are downloading Respeck data, remove Respeck timestamps and sequence number
        if prefix_storage_filename == "RESpeck":
            data = data.drop(['respeckTimestamp', 'sequenceNumber'], axis=1)

        data = data[localised_start.astimezone(timezone('UTC')).replace(
            tzinfo=None):localised_end.astimezone(timezone('UTC')).replace(
                tzinfo=None)]

        data.to_csv(out_directory + "/" + out_filename, index=False)
예제 #5
0
def download_static_airspeck(subj_or_uuid,
                             sensor_label=None,
                             project_name=None,
                             overwrite_if_already_exists=False,
                             timeframe=None,
                             upload_type='automatic',
                             suffix_filename="",
                             filename=None,
                             subject_visit_number=None,
                             out_directory=None):
    assert upload_type in [
        'automatic', 'sd_card'
    ], "upload_type has to be either 'automatic' or 'sd_card'"

    if project_name is None:
        if len(subj_or_uuid) == 6:
            project_name = get_project_for_subject(subj_or_uuid)
        else:
            raise ValueError(
                "When passing a UUID and not a subject ID, also specify a project_name so that the "
                "correct directory can be selected")

    if out_directory is None:
        out_directory = project_mapping[project_name][2]

    if sensor_label is None:
        if len(subj_or_uuid) == 6 and subject_visit_number is not None:
            sensor_label = "{}({})".format(subj_or_uuid, subject_visit_number)
        else:
            sensor_label = subj_or_uuid

    if filename is None:
        filename = "{}_static_airspeck_{}{}.csv".format(
            sensor_label, upload_type, suffix_filename)

    out_filepath = out_directory + filename

    if not overwrite_if_already_exists and os.path.isfile(out_filepath):
        print('Skipping file as it already exists')
        return

    client = get_datastore_client()

    with open(out_filepath, "w") as out:

        out.write(
            "timestamp,pm1,pm2_5,pm10,bin0,bin1,bin2,bin3,bin4,bin5,bin6,bin7,bin8,bin9,bin10,bin11,bin12,"
            "bin13,bin14,bin15,temperature,humidity,battery,no2_ae,no2_we,ox_ae,ox_we,"
            "gpsLatitude,gpsLongitude\n")

        # Did user pass timeframe? If not, load all data
        if timeframe is None:
            timeframe = [datetime(2016, 1, 1), datetime(2100, 1, 1)]

        tz = timezone(project_mapping[project_name][1])

        if timeframe[0].tzinfo is None:
            utc_start = tz.localize(timeframe[0]).astimezone(
                timezone('UTC')).replace(tzinfo=None)
            utc_end = tz.localize(timeframe[1]).astimezone(
                timezone('UTC')).replace(tzinfo=None)
        else:
            utc_start = timeframe[0]
            utc_end = timeframe[1]

        if upload_type == 'automatic':
            kind_name = 'StaticAirspeck'
            if len(subj_or_uuid) == 16:
                id_name = 'uuid'
            else:
                id_name = 'subject_id'
        else:
            kind_name = 'StaticAirspeckSDCard'
            if len(subj_or_uuid) == 16:
                id_name = 'airspeck_uuid'
            else:
                id_name = 'subject_id'

        query = client.query(kind=kind_name,
                             filters=[(id_name, '=', subj_or_uuid),
                                      ('timestamp', '>=', utc_start),
                                      ('timestamp', '<', utc_end)],
                             order=['timestamp']).fetch()

        for e in query:
            out.write("{},{},{},{},".format(
                e['timestamp'].replace(tzinfo=None), e['pm1'], e['pm2_5'],
                e['pm10']))
            for i in range(0, 16):
                out.write("{},".format(e['bins'][i]))
            if upload_type == 'automatic':
                out.write("{},{},{},{},{},{},{},{},{}\n".format(
                    e['temperature'], e['humidity'], e['battery'], e['no2_ae'],
                    e['no2_we'], e['ox_ae'], e['ox_we'],
                    e['location']['latitude'], e['location']['longitude']))
            else:
                out.write("{},{},{},{},{},{},{},{},{}\n".format(
                    e['temperature'], e['humidity'], e['battery'], e['no2_ae'],
                    e['no2_we'], e['ox_ae'], e['ox_we'], e['latitude'],
                    e['longitude']))

    print('Done')
예제 #6
0
def load_personal_airspeck_file(subject_id,
                                project_name=None,
                                upload_type='automatic',
                                is_minute_averaged=True,
                                subject_visit_number=None,
                                suffix_filename="",
                                calibrate_pm_and_gas=False,
                                use_all_features_for_pm_calibration=False,
                                use_all_features_for_gas_calibration=False,
                                suppress_output=False,
                                set_below_zero_to=np.nan,
                                return_calibration_flag=False,
                                calibration_id=None,
                                filter_pm=True,
                                country_name=None):
    '''
    Load an Airspeck personal csv file to a pandas dataframe in the correct timezone
    :param subject_id: 6-character subject ID
    :param project_name: For some projects, this is the actual name "daphne", for others, it's the project ID.
    See constants.py for a list of all project names
    :param filename: the filename to load. If None, load default filename "[Subject ID]_airspeck_personal.csv"
    :param is_minute_averaged: If the raw file was downloaded instead of minute averages. This only affects the default filename.
    :param subject_visit_number: Which of several recordings of a subject should be loaded.
    Only relevant for some projects like Daphne
    :param calibrate_pm_and_gas: Whether to calibrated the PM2.5 data, if calibration factors are available.
    :param use_all_features_for_pm_calibration: Whether to only use the uncalibrated PM2.5 data (recommended), or all
     features, including the bin counts and temperature/humidity. The latter often looks better during the actual
      calibration, but gives worse results later.
    :param suppress_output: Whether to print out if bad values were filtered, i.e. set to zero.
    :param set_below_zero_to: Set values below zero to a desired value (default np.nan)
    :param return_calibration_flag: Instead of just returning the dataframe, prepend whether the data was calibrated:
    return is_calibrated, data. This is useful to see if data from a subject loaded from disk was calibrated.
    :return:
    '''
    if subject_visit_number is None:
        label_files = subject_id
    else:
        label_files = "{}({:.0f})".format(subject_id,
                                          int(subject_visit_number))

    if project_name is None:
        project_name = get_project_for_subject(subject_id)

    if is_minute_averaged:
        filename = "{}_airspeck_personal_{}{}.csv".format(
            label_files, upload_type, suffix_filename)
    else:
        filename = "{}_airspeck_personal_{}_raw{}.csv".format(
            label_files, upload_type, suffix_filename)

    print("Loading file: {}".format(project_mapping[project_name][2] +
                                    filename))
    data = load_airrespeck_file(project_mapping[project_name][2] + filename,
                                project_name)

    if calibrate_pm_and_gas:
        result_date, was_calibrated_pm, was_calibrated_no2, was_calibrated_ox, data = calibrate_airspeck(
            subject_id,
            data,
            project_name=project_name,
            calibrate_pm=True,
            calibrate_no2=False,
            calibrate_ox=False,
            calibration_id=calibration_id,
            use_all_features_pm=use_all_features_for_pm_calibration,
            use_all_features_gas=use_all_features_for_gas_calibration,
            country_name=country_name)

    if filter_pm and data is not None and len(data) > 0:
        below_zero_mask = data['pm2_5'] <= 0

        if np.count_nonzero(below_zero_mask):
            if not suppress_output:
                print("Setting {} values equal to or below 0 to {}".format(
                    np.count_nonzero(below_zero_mask), set_below_zero_to))
            data.loc[below_zero_mask, 'pm2_5'] = set_below_zero_to

        # Fix humidity values. Sometimes valid readings of humidity pass 100. Above 105, they are definitely invalid
        data.loc[data['humidity'] > 105, 'humidity'] = np.nan

    if calibrate_pm_and_gas and return_calibration_flag:
        return result_date, was_calibrated_pm, data
    else:
        return data
예제 #7
0
def download_respeck_data_and_plot_pixelgram(
        subject_id,
        project_name=None,
        upload_type='automatic',
        timeframe=None,
        overwrite_pixelgram_if_already_exists=False,
        filter_out_not_worn=True,
        overwrite_data_if_already_exists=False,
        subject_visit_number=None):
    if project_name is None:
        project_name = get_project_for_subject(subject_id)

    plot_dir = project_mapping[project_name][3]
    label_files = "{}({})".format(subject_id, subject_visit_number)

    pixelgram_filepath = plot_dir + "{}_respeck_pixelgram.png".format(
        label_files)

    # Check if pixelgram already exists
    if not overwrite_pixelgram_if_already_exists and os.path.isfile(
            pixelgram_filepath):
        print("Pixelgram for subject {} already exists. Skipping subject.".
              format(label_files))
        return

    # Download files if they weren't there yet before
    download_respeck_data(
        subject_id,
        upload_type=upload_type,
        timeframe=timeframe,
        overwrite_if_already_exists=overwrite_data_if_already_exists,
        subject_visit_number=subject_visit_number)

    respeck_data = load_respeck_file(subject_id,
                                     project_name,
                                     subject_visit_number=subject_visit_number,
                                     upload_type=upload_type,
                                     filter_out_not_worn=filter_out_not_worn)

    if len(respeck_data) == 0:
        print(
            "File for subject {} empty. Skipping subject.".format(subject_id))
        return

    if timeframe is not None:
        tz = timezone(project_mapping[project_name][1])

        if timeframe[0].tzinfo is None:
            start_time = tz.localize(timeframe[0])
            end_time = tz.localize(timeframe[1])
        else:
            start_time = timeframe[0]
            end_time = timeframe[1]
        plot_respeck_pixelgram(
            subject_id,
            respeck_data[start_time:end_time],
            pixelgram_filepath,
            overwrite_if_already_exists=overwrite_pixelgram_if_already_exists,
            subject_visit_number=subject_visit_number)
    else:
        plot_respeck_pixelgram(
            subject_id,
            respeck_data,
            pixelgram_filepath,
            overwrite_if_already_exists=overwrite_pixelgram_if_already_exists,
            subject_visit_number=subject_visit_number)
예제 #8
0
def download_data_and_plot_combined_pixelgram(
        subject_id,
        timeframe=None,
        filter_out_not_worn_respeck=True,
        overwrite_pixelgram_if_already_exists=False,
        subject_visit_number=None,
        overwrite_data_if_already_exists=False,
        upload_type='automatic'):
    project_name = get_project_for_subject(subject_id)
    plot_dir = project_mapping[project_name][3]

    if subject_visit_number is None:
        label_files = "{}".format(subject_id)
    else:
        label_files = "{}({})".format(subject_id, subject_visit_number)

    pixelgram_filepath = plot_dir + "{}_combined_pixelgram.png".format(
        label_files)

    # Check if pixelgram already exists
    if not overwrite_pixelgram_if_already_exists and os.path.isfile(
            pixelgram_filepath):
        print("Pixelgram for subject {} already exists. Skipping subject.".
              format(label_files))
        return

    # Download data if not present
    download_respeck_and_personal_airspeck_data(
        subject_id,
        upload_type=upload_type,
        timeframe=timeframe,
        overwrite_if_already_exists=overwrite_data_if_already_exists,
        subject_visit_number=subject_visit_number)

    # Load data and create plot
    respeck_data = load_respeck_file(
        subject_id,
        project_name=project_name,
        upload_type=upload_type,
        subject_visit_number=subject_visit_number,
        filter_out_not_worn=filter_out_not_worn_respeck)
    airspeck_data = load_personal_airspeck_file(
        subject_id,
        project_name=project_name,
        upload_type=upload_type,
        subject_visit_number=subject_visit_number)

    if len(respeck_data) == 0:
        print("RESpeck data for subject {} empty. Skipping subject.".format(
            label_files))
        return

    if len(airspeck_data) == 0:
        print("Airspeck data for subject {} empty. Skipping subject.".format(
            label_files))
        return

    if timeframe is not None:
        tz = timezone(project_mapping[project_name][1])

        if timeframe[0].tzinfo is None:
            start_time = tz.localize(timeframe[0])
            end_time = tz.localize(timeframe[1])
        else:
            start_time = timeframe[0]
            end_time = timeframe[1]

        plot_combined_pixelgram(
            subject_id,
            respeck_data[start_time:end_time],
            airspeck_data[start_time:end_time],
            pixelgram_filepath,
            overwrite_if_already_exists=overwrite_pixelgram_if_already_exists,
            subject_visit_number=subject_visit_number)
    else:
        plot_combined_pixelgram(
            subject_id,
            respeck_data,
            airspeck_data,
            pixelgram_filepath,
            overwrite_if_already_exists=overwrite_pixelgram_if_already_exists,
            subject_visit_number=subject_visit_number)
예제 #9
0
def plot_respeck_pixelgram(subject_id,
                           respeck_data,
                           plot_filepath=None,
                           overwrite_if_already_exists=False,
                           subject_visit_number=None):
    if respeck_data is None or len(respeck_data) == 0:
        print("Skipping Pixelgram as no data was passed")
        return

    if plot_filepath is None:
        directory = project_mapping[get_project_for_subject(subject_id)][3]
        if subject_visit_number is None:
            plot_filepath = directory + "{}_respeck_pixelgram.png".format(
                subject_id)
        else:
            plot_filepath = directory + "{}({})_respeck_pixelgram.png".format(
                subject_id, subject_visit_number)

    if not overwrite_if_already_exists and os.path.exists(plot_filepath):
        print("Pixelgram already exists.")
        return  # The file was already created, so stop execution of this function

    sns.reset_orig()
    # Hour offset so that the legend can be displayed inside graph.
    hours_offset = 9
    hours, num_days, time_grid = prepare_grid(respeck_data,
                                              hours_offset=hours_offset)

    # Load data into grid
    norm_act = normalise_into_range(respeck_data['activity_level'], 0, 0.7)
    norm_br = normalise_into_range(respeck_data['breathing_rate'], 10, 40)
    norm_stepcount = normalise_into_range(respeck_data['step_count'], 0, 120.)

    actlevel_grid = np.zeros_like(time_grid, dtype=float)
    br_grid = np.zeros_like(time_grid, dtype=float)
    acttype_grid = np.zeros_like(time_grid, dtype=int)
    stepcount_grid = np.zeros_like(time_grid, dtype=float)

    for idx in range(len(respeck_data)):
        time_diff_minutes = int(
            (respeck_data.index[idx] - time_grid[0][0]).total_seconds() / 60.)
        idx2d = (int(time_diff_minutes / 60), int(time_diff_minutes % 60))

        br_grid[idx2d] = norm_br[idx]
        actlevel_grid[idx2d] = norm_act[idx]
        acttype_grid[idx2d] = respeck_data['activity_type'][idx] + 1
        stepcount_grid[idx2d] = norm_stepcount[idx]

    # Plot
    fig, axes = plt.subplots(1, 4)
    fig.set_size_inches((20, (num_days + hours_offset / 24.) * 4.5))

    prepare_axes(axes, hours_offset, hours, num_days, time_grid)

    plot_column(axes[0], br_grid, "Breathing rate (10-40 BrPM)")
    plot_column(axes[1], acttype_grid, is_activity_type=True)
    plot_column(axes[2], actlevel_grid, "Activity level (0-0.7)")
    plot_column(axes[3], stepcount_grid, "Step counts (0-120 steps)")

    plt.subplots_adjust(wspace=0.2)
    plt.tight_layout()
    plt.savefig(plot_filepath, dpi=300)
예제 #10
0
def plot_combined_pixelgram(subject_id,
                            respeck_data,
                            airspeck_data,
                            plot_filepath=None,
                            overwrite_if_already_exists=False,
                            subject_visit_number=None):
    """
    airspeck_data and respeck_data should be minute average pandas dataframes, with the timestamp as index
    """

    if (respeck_data is None or len(respeck_data)
            == 0) and (airspeck_data is None or len(airspeck_data) == 0):
        print("Skipping Pixelgram as no data was passed")
        return

    if plot_filepath is None:
        directory = project_mapping[get_project_for_subject(subject_id)][3]
        if subject_visit_number is None:
            plot_filepath = directory + "{}_combined_pixelgram.png".format(
                subject_id)
        else:
            plot_filepath = directory + "{}({})_combined_pixelgram.png".format(
                subject_id, subject_visit_number)

    if not overwrite_if_already_exists and os.path.exists(plot_filepath):
        print("Pixelgram already exists.")
        return  # The file was already created, so stop execution of this function

    sns.reset_orig()
    # Hour offset so that the legend can be displayed inside graph. Legend doesn't show at the moment,
    # so set to 0
    hours_offset = 9
    hours, num_days, time_grid = prepare_grid(respeck_data, airspeck_data,
                                              hours_offset)

    # Calculate GPS speed
    add_speed_to_gps_data(airspeck_data, 'gpsLatitude', 'gpsLongitude')

    # Normalise data
    norm_GPS = normalise_into_range(airspeck_data['speed'], 0, 20)
    norm_lux = normalise_into_range(airspeck_data['luxLevel'], 0, 4)

    pm_95_percentile = np.nanpercentile(airspeck_data['pm2_5'], 95)
    pm_max = pm_95_percentile if pm_95_percentile > 50 else 50
    norm_pm2_5 = normalise_into_range(airspeck_data['pm2_5'], 0, pm_max)

    norm_act = normalise_into_range(respeck_data['activity_level'], 0, 0.7)
    norm_br = normalise_into_range(respeck_data['breathing_rate'], 10, 40)

    # Create grids
    pm25_grid = np.zeros_like(time_grid, dtype=float)
    lux_grid = np.zeros_like(time_grid, dtype=float)
    gps_grid = np.zeros_like(time_grid, dtype=float)
    actlevel_grid = np.zeros_like(time_grid, dtype=float)
    br_grid = np.zeros_like(time_grid, dtype=float)
    acttype_grid = np.zeros_like(time_grid, dtype=int)

    # Fill in RESpeck data
    for idx in range(len(respeck_data['timestamp'])):
        time_diff_minutes = int(
            (respeck_data['timestamp'][idx] - time_grid[0][0]).total_seconds()
            / 60.)
        idx2d = (int(time_diff_minutes / 60), int(time_diff_minutes % 60))

        br_grid[idx2d] = norm_br[idx]
        actlevel_grid[idx2d] = norm_act[idx]
        if not np.isnan(norm_act[idx]):
            acttype_grid[idx2d] = respeck_data['activity_type'][idx] + 1

    # Same for Airspeck
    for idx in range(len(airspeck_data['timestamp'])):
        time_diff_minutes = int((airspeck_data['timestamp'][idx] -
                                 time_grid[0][0]).total_seconds() / 60.)
        idx2d = (int(time_diff_minutes / 60), int(time_diff_minutes % 60))

        lux_grid[idx2d] = norm_lux[idx]
        gps_grid[idx2d] = norm_GPS[idx]
        pm25_grid[idx2d] = norm_pm2_5[idx]

    fig, axes = plt.subplots(1, 6)
    fig.set_size_inches((30, num_days * 4.5))

    prepare_axes(axes, hours_offset, hours, num_days, time_grid)

    plot_column(axes[0], lux_grid, "Lux level (0-4)")
    plot_column(axes[1], br_grid, "Breathing rate (10-30 BrPM)")
    plot_column(axes[2], pm25_grid,
                "PM 2.5, in ug/m3 (0 - {})".format(int(pm_max)))
    plot_column(axes[3], acttype_grid, is_activity_type=True)
    plot_column(axes[4], actlevel_grid, "Activity level (0-0.7)")
    plot_column(axes[5], gps_grid, "Speed (0-20 km/h)")

    plt.subplots_adjust(wspace=0.2)
    plt.tight_layout()
    plt.savefig(plot_filepath, dpi=300)
예제 #11
0
def plot_airspeck_pixelgram(subject_id,
                            airspeck_data,
                            plot_filepath=None,
                            overwrite_if_already_exists=False,
                            subject_visit_number=None):
    if airspeck_data is None or len(airspeck_data) == 0:
        print("Skipping Pixelgram as no data was passed")
        return

    if plot_filepath is None:
        directory = project_mapping[get_project_for_subject(subject_id)][3]
        if subject_visit_number is None:
            plot_filepath = directory + "{}_airspeck_pixelgram.png".format(
                subject_id)
        else:
            plot_filepath = directory + "{}({})_airspeck_pixelgram.png".format(
                subject_id, subject_visit_number)

    if not overwrite_if_already_exists and os.path.exists(plot_filepath):
        print("Pixelgram already exists.")
        return  # The file was already created, so stop execution of this function

    sns.reset_orig()
    hours, num_days, time_grid = prepare_grid(airspeck_data)

    # Load data into grid
    pm_95_percentile = np.nanpercentile(airspeck_data['pm2_5'], 95)
    pm_max = pm_95_percentile if pm_95_percentile > 50 else 50
    norm_pm2_5 = normalise_into_range(airspeck_data['pm2_5'], 0, pm_max)
    norm_lux = normalise_into_range(airspeck_data['luxLevel'], 0, 3)

    add_speed_to_gps_data(airspeck_data, 'gpsLatitude', 'gpsLongitude')
    norm_GPS = normalise_into_range(airspeck_data['speed'], 0, 20)
    norm_gps_accuracy = normalise_into_range(airspeck_data['gpsAccuracy'], 0,
                                             100)
    norm_motion = normalise_into_range(airspeck_data['motion'], 0, 500)

    pm25_grid = np.zeros_like(time_grid, dtype=float)
    lux_grid = np.zeros_like(time_grid, dtype=float)
    recording_grid = np.zeros_like(time_grid, dtype=float)
    gps_grid = np.zeros_like(time_grid, dtype=float)
    gps_acc_grid = np.zeros_like(time_grid, dtype=float)
    motion_grid = np.zeros_like(time_grid, dtype=float)

    for idx in range(len(airspeck_data['timestamp'])):
        time_diff_minutes = int((airspeck_data['timestamp'][idx] -
                                 time_grid[0][0]).total_seconds() / 60.)
        idx2d = (int(time_diff_minutes / 60), int(time_diff_minutes % 60))

        pm25_grid[idx2d] = norm_pm2_5[idx]
        lux_grid[idx2d] = norm_lux[idx]
        recording_grid[idx2d] = 1
        gps_grid[idx2d] = norm_GPS[idx]
        gps_acc_grid[idx2d] = norm_gps_accuracy[idx]
        motion_grid[idx2d] = norm_motion[idx]

    # Plot
    fig, axes = plt.subplots(1, 6)
    fig.set_size_inches((30, num_days * 4.5))

    prepare_axes(axes, 0, hours, num_days, time_grid)

    plot_column(axes[0], recording_grid, "Recording present (dark = yes)")
    plot_column(axes[1], pm25_grid,
                "PM 2.5, in ug/m3 (0 - {})".format(int(pm_max)))
    plot_column(axes[2], lux_grid, "Lux level (0-4)")
    plot_column(axes[3], gps_grid, "Speed (0-20 km/h)")
    plot_column(
        axes[4], gps_acc_grid,
        "Accuracy of GPS in meters.\nDark = low accuracy = probably inside.")
    plot_column(axes[5], motion_grid,
                "Level of activity. The darker, the more activity.")

    plt.subplots_adjust(wspace=0.2)
    plt.tight_layout()
    plt.savefig(plot_filepath, dpi=300)