def test_restrict_timeframe():

    # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s

    start = counts.timestamps[0]
    end = counts.timestamps[-1]

    # Summarise the data before restricting
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[("generic",
                                               ["sum", "n", "missing"])]))

    # Number of 1s = 24 hours then 9 hours then 24 hours
    assert (
        summary_before.get_channel("AG_Counts_sum").data[0] == (24 + 9 + 24) *
        60)

    # Trim an hour off the start
    counts.restrict_timeframe(start + timedelta(hours=1), end)
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[("generic",
                                               ["sum", "n", "missing"])]))

    # Should be 1 hour less
    assert (summary_after.get_channel("AG_Counts_sum").data[0] == int(
        (23 + 9 + 24) * 60))

    # Repeating exactly the same thing should change nothing
    counts.restrict_timeframe(start + timedelta(hours=1), end)
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[("generic",
                                               ["sum", "n", "missing"])]))
    assert (summary_after.get_channel("AG_Counts_sum").data[0] == int(
        (23 + 9 + 24) * 60))

    # Now trim an hour off the end
    counts.restrict_timeframe(start + timedelta(hours=1),
                              end - timedelta(hours=1))
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[("generic",
                                               ["sum", "n", "missing"])]))

    # Should now be an hour less again
    assert (summary_after.get_channel("AG_Counts_sum").data[0] == int(
        (23 + 9 + 23) * 60))

    # Now trim to a single hour
    counts.restrict_timeframe(start + timedelta(hours=12),
                              start + timedelta(hours=12, minutes=59))
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[("generic",
                                               ["sum", "n", "missing"])]))

    # Should now be an hour of 1s
    assert (summary_after.get_channel("AG_Counts_sum").data[0] == 60)
Beispiel #2
0
    def build_statistics_channels(self, windows, statistics, name=""):


        channel_list = []

        for stat in statistics:
            #print(stat)
            channel_names = design_variable_names(self.name, stat)
            #print(channel_names)
            for cn in channel_names:
                channel_list.append(Channel(cn))

        num_expected_results = len(channel_list)

        for window in windows:

            results = self.window_statistics(window.start_timestamp, window.end_timestamp, statistics)
            if len(results) != num_expected_results:
                raise Exception("Incorrect number of statistics yielded. {} expected, {} given. Channel: {}. Statistics: {}.".format(num_expected_results, len(results), self.name, statistics))

            for i in range(len(results)):
                #print len(results)
                channel_list[i].append_data(window.start_timestamp, results[i])

        for channel in channel_list:
            channel.calculate_timeframe()
            channel.data = np.array(channel.data)
            channel.timestamps = np.array(channel.timestamps)

        ts = Time_Series(name)
        ts.add_channels(channel_list)
        return ts
Beispiel #3
0
def test_a():
    # Case A
    # Both timestamps preceed data

    origin = counts.timestamps[0]

    start = origin - timedelta(days=2)
    end = origin - timedelta(days=1)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # All values should be identical, loop through them and assert equality
    suffixes = "sum n missing 0_0 0_1 1_1".split(" ")

    for suffix in suffixes:
        assert (summary_before.get_channel("AG_Counts_" + suffix).data[0] ==
                summary_after.get_channel("AG_Counts_" + suffix).data[0])
Beispiel #4
0
def test_nonwear_amount():

    # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s

    nonwear_bouts, wear_bouts = channel_inference.infer_nonwear_actigraph(counts)

    # There is 1 nonwear bout and 2 wear bouts surrounding it
    assert(len(nonwear_bouts) == 1)
    assert(len(wear_bouts) == 2)

    Bout.cache_lengths(nonwear_bouts)
    Bout.cache_lengths(wear_bouts)

    nw_bout = nonwear_bouts[0]

    # The nonwear bout is 15 hours long
    assert(nw_bout.length == timedelta(hours=15))

    # Summarise the data before deleting the nonwear
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])]))

    # Number of 1s = 24 hours then 9 hours then 24 hours
    assert(summary_before.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60)

    # 15 hours of 0s
    assert(summary_before.get_channel("AG_Counts_0_0").data[0] == 15*60)

    # Sum should = number of 1s
    assert(summary_before.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60)

    # n should be 3 days = 1440*3 = 24*3*60
    assert(summary_before.get_channel("AG_Counts_n").data[0] == 24*3*60)

    # Missing should be 0
    assert(summary_before.get_channel("AG_Counts_missing").data[0] == 0)

    counts.delete_windows(nonwear_bouts)

    # Summarise the data after deleting the nonwear
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])]))

    # Sum shouldn't have changed
    assert(summary_after.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60)

    # All the 0s were nonwear, so there should now be no 0s
    assert(summary_after.get_channel("AG_Counts_0_0").data[0] == 0)

    # And the number of 1s shouldn't have changed
    assert(summary_after.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60)

    # n should have reduced by 15 hours = 15*60
    assert(summary_after.get_channel("AG_Counts_n").data[0] == (24+9+24)*60)

    # missing should have gone up by 15 hours = 15*60
    assert(summary_after.get_channel("AG_Counts_missing").data[0] == 15*60)
Beispiel #5
0
def test_f():
    # Case F
    # Multiple deletions producing consistent results

    origin = counts.timestamps[0]

    # Delete first 2 hours
    start = origin
    end = origin + timedelta(hours=2)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after_a = Time_Series.Time_Series("")
    summary_after_a.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # Delete midday to 2pm
    start = origin + timedelta(hours=12)
    end = origin + timedelta(hours=14)

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after_b = Time_Series.Time_Series("")
    summary_after_b.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # 20 hours left
    assert (summary_after_b.get_channel("AG_Counts_n").data[0] == 20 * 60)

    # 4 hours missing
    assert (summary_after_b.get_channel("AG_Counts_missing").data[0] == 4 * 60)

    # Sum data should be 20 1s
    assert (summary_after_b.get_channel("AG_Counts_sum").data[0] == 20 * 60)
Beispiel #6
0
def test_b():
    # Case B
    # First timestamp preceeds data, second doesn't

    origin = counts.timestamps[0]

    start = origin - timedelta(hours=12)
    end = origin + timedelta(hours=12)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # n should go down and missing should go up
    assert (summary_before.get_channel("AG_Counts_n").data[0] >
            summary_after.get_channel("AG_Counts_n").data[0])
    assert (summary_before.get_channel("AG_Counts_missing").data[0] <
            summary_after.get_channel("AG_Counts_missing").data[0])

    # Should only be 12 hours left
    assert (summary_after.get_channel("AG_Counts_n").data[0] == 12 * 60)

    # And 12 hours missing
    assert (summary_after.get_channel("AG_Counts_missing").data[0] == 12 * 60)
Beispiel #7
0
def test_c():
    # Case C
    # Both timestamps inside data
    origin = counts.timestamps[0]

    start = origin + timedelta(hours=6)
    end = origin + timedelta(hours=7)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # n should go down and missing should go up
    assert (summary_before.get_channel("AG_Counts_n").data[0] >
            summary_after.get_channel("AG_Counts_n").data[0])
    assert (summary_before.get_channel("AG_Counts_missing").data[0] <
            summary_after.get_channel("AG_Counts_missing").data[0])

    # Should only be 23 hours left
    assert (summary_after.get_channel("AG_Counts_n").data[0] == 23 * 60)

    # And 1 hours missing
    assert (summary_after.get_channel("AG_Counts_missing").data[0] == 1 * 60)
# Load sample activPAL data
x, y, z = Channel.load_channels(
    "/pa/data/STVS/_data/activpal_data/714952C-AP1335893 18Nov13 10-00am for 7d 23h 14m.datx",
    "activPAL")

# Autocalibrate the raw acceleration data
x, y, z, (cal_params), (results), (misc) = triaxial_calibration.calibrate(
    x, y, z)

# Infer some sample level info from the three channels - VM, ENMO, Pitch & Roll
vm = channel_inference.infer_vector_magnitude(x, y, z)
enmo = channel_inference.infer_enmo(vm)
pitch, roll = channel_inference.infer_pitch_roll(x, y, z)

# Create a time series object and add all signals to it
ts = Time_Series.Time_Series("activPAL")
ts.add_channels([x, y, z, vm, enmo, pitch, roll])

# Request some stats about the time series
# In this case: mean ENMO, pitch and roll, and 10 degree cutpoints of pitch and roll
angle_levels = [[-90, -80], [-80, -70], [-70, -60], [-60, -50], [-50, -40],
                [-40, -30], [-30, -20], [-20, -10], [-10, 0], [0, 10],
                [10, 20], [20, 30], [30, 40], [40, 50], [50, 60], [60, 70],
                [70, 80], [80, 90]]
stat_dict = {
    "Pitch": angle_levels + ["mean"],
    "Roll": angle_levels + ["mean"],
    "ENMO": ["mean"]
}

# Get the output at 15 minute level
Beispiel #9
0
def qc_analysis(job_details):

    id_num = str(job_details["pid"])
    filename = job_details["filename"]

    filename_short = os.path.basename(filename).split('.')[0]

    battery_max = 0
    if filetype == "GeneActiv":
        battery_max = GA_battery_max
    elif filetype == "Axivity":
        battery_max = AX_battery_max

    # Load the data from the hdf5 file
    ts, header = data_loading.fast_load(filename, filetype)

    header["QC_filename"] = os.path.basename(filename)

    x, y, z, battery, temperature = ts.get_channels(["X", "Y", "Z", "Battery", "Temperature"])
    
    # create a channel of battery percentage, based on the assumed battery maximum value 
    battery_pct = Channel.Channel.clone(battery)
    battery_pct.data = (battery.data / battery_max) * 100
    
    channels = [x, y, z, battery, temperature, battery_pct]
    
    anomalies = diagnostics.diagnose_fix_anomalies(channels, discrepancy_threshold=2)

    # create dictionary of anomalies types
    anomalies_dict = dict()
                        
    # check whether any anomalies have been found:
    if len(anomalies) > 0:
        anomalies_file = os.path.join(anomalies_folder, "{}_anomalies.csv".format(filename_short))
        df = pd.DataFrame(anomalies)
        
        for type in anomaly_types:
            anomalies_dict["QC_anomaly_{}".format(type)] = (df.anomaly_type.values == type).sum()
        
        df = df.set_index("anomaly_type")
        # print record of anomalies to anomalies_file
        df.to_csv(anomalies_file)
        
    else:
        for type in anomaly_types:
            anomalies_dict["QC_anomaly_{}".format(type)] = 0
        
    # check for axis anomalies
    axes_dict = diagnostics.diagnose_axes(x, y, z, noise_cutoff_mg=13)
    
    axis_anomaly = False
    
    for key, val in axes_dict.items():
        anomalies_dict["QC_{}".format(key)] = val
        if key.endswith("max"):
            if val > axis_max:
                axis_anomaly = True
        elif key.endswith("min"):
            if val < axis_min:
                axis_anomaly = True

    # create a "check battery" flag:
    check_battery = False

    # calculate first and last battery percentages
    first_battery_pct = round((battery_pct.data[1]),2)
    last_battery_pct = round((battery_pct.data[-1]),2)
    header["QC_first_battery_pct"] = first_battery_pct
    header["QC_last_battery_pct"] = last_battery_pct
    
    # calculate lowest battery percentage
    # check if battery.pct has a missing_value, exclude those values if they exist
    if battery_pct.missing_value == "None":
        lowest_battery_pct = min(battery_pct.data)
    else:
        test_array = np.delete(battery_pct.data, np.where(battery_pct.data == battery_pct.missing_value))
        lowest_battery_pct = min(test_array)
    
    header["QC_lowest_battery_pct"] = round(lowest_battery_pct,2)
    header["QC_lowest_battery_threshold"] = battery_minimum
        
    # find the maximum battery discharge in any 24hr period:    
    max_discharge = battery_pct.channel_max_decrease(time_period=timedelta(hours=discharge_hours))
    header["QC_max_discharge"] = round(max_discharge, 2)
    header["QC_discharge_time_period"] = "{} hours".format(discharge_hours)
    header["QC_discharge_threshold"] = discharge_pct

    # change flag if lowest battery percentage dips below battery_minimum at any point 
    # OR maximum discharge greater than discharge_pct over time period "hours = discharge_hours"
    if lowest_battery_pct < battery_minimum or max_discharge > discharge_pct:
        check_battery = True
        
    header["QC_check_battery"] = str(check_battery)
    header["QC_axis_anomaly"] = str(axis_anomaly)

    # Calculate the time frame to use
    start = time_utilities.start_of_day(x.timeframe[0])
    end = time_utilities.end_of_day(x.timeframe[-1])
    tp = (start, end)

    results_ts = Time_Series.Time_Series("")

    # Derive some signal features
    vm = channel_inference.infer_vector_magnitude(x, y, z)
    enmo = channel_inference.infer_enmo(vm)
    enmo.minimum = 0
    enmo.maximum = enmo_max

    # Infer nonwear
    nonwear_bouts = channel_inference.infer_nonwear_for_qc(x, y, z, noise_cutoff_mg=noise_cutoff_mg)
    # Use nonwear bouts to calculate wear bouts
    wear_bouts = Bout.time_period_minus_bouts(enmo.timeframe, nonwear_bouts)

    # Use wear bouts to calculate the amount of wear time in the file in hours, save to meta data
    total_wear = Bout.total_time(wear_bouts)
    total_seconds_wear = total_wear.total_seconds()
    total_hours_wear = round(total_seconds_wear/3600)
    header["QC_total_hours_wear"] = total_hours_wear

    # Split the enmo channel into lists of bouts for each quadrant:
    ''' quadrant_0 = 00:00 -> 06: 00
        quadrant_1 = 06:00 -> 12: 00
        quadrant_2 = 12:00 -> 18: 00
        quadrant_3 = 18:00 -> 00: 00 '''
    q_0, q_1, q_2, q_3 = channel_inference.create_quadrant_bouts(enmo)

    # calculate the intersection of each set of bouts with wear_bouts, then calculate the wear time in each quadrant.
    sum_quadrant_wear = 0
    for quadrant, name1, name2 in ([q_0, "QC_hours_wear_quadrant_0", "QC_pct_wear_quadrant_0"],
                                   [q_1, "QC_hours_wear_quadrant_1", "QC_pct_wear_quadrant_1"],
                                   [q_2, "QC_hours_wear_quadrant_2", "QC_pct_wear_quadrant_2"],
                                   [q_3, "QC_hours_wear_quadrant_3", "QC_pct_wear_quadrant_3"]):
        quadrant_wear = Bout.bout_list_intersection(quadrant, wear_bouts)
        seconds_wear = Bout.total_time(quadrant_wear).total_seconds()
        hours_wear = round(seconds_wear / 3600)
        header[name1] = hours_wear
        header[name2] = round(((hours_wear / total_hours_wear) * 100), 2)

    for bout in nonwear_bouts:
        # Show non-wear bouts in purple
        bout.draw_properties = {'lw': 0, 'alpha': 0.75, 'facecolor': '#764af9'}

    for channel, channel_name in zip([enmo, battery_pct],["ENMO", "Battery_percentage"]):
        channel.name = channel_name
        results_ts.add_channel(channel)

    if PLOT == "YES":    
        # Plot statistics as subplots in one plot file per data file
        results_ts["ENMO"].add_annotations(nonwear_bouts)
        results_ts.draw_qc(plotting_df, file_target=os.path.join(charts_folder,"{}_plots.png".format(filename_short)))

    header["QC_script"] = version
    
    # file of metadata from qc process
    qc_output = os.path.join(results_folder, "qc_meta_{}.csv".format(filename_short))
    # check if qc_output already exists...
    if os.path.isfile(qc_output):
        os.remove(qc_output)
    
    metadata = {**header, **anomalies_dict}
    
    # write metadata to file
    pampro_utilities.dict_write(qc_output, id_num, metadata)

    for c in ts:
        del c.data
        del c.timestamps
        del c.indices
        del c.cached_indices
# Request some interesting statistics - mean, min and max of the counts signal
# ...plus basic cutpoints for Sedentary, Light, and Moderate to Vigorous

stats = {
    "AG_Counts": [("generic", ["mean", "min", "max"]),
                  ("cutpoints", [[0, 99], [100, 2999], [3000, 99999]])]
}

# Load Actigraph data
counts, header = Channel.load_channels(
    "/pa/data/Tom/pampro/data/example_actigraph.DAT",
    "Actigraph",
    datetime_format="%m/%d/%Y")

ts = Time_Series.Time_Series("Actigraph")
ts.add_channel(counts)

# Get a list of bouts where the monitor was & wasn't worn
nonwear_bouts = channel_inference.infer_nonwear_actigraph(
    counts, zero_minutes=timedelta(minutes=90))

# Use that list to get a list of days of valid & invalid time
invalid_bouts = channel_inference.infer_valid_days(counts, wear_bouts)

# Since the cutpoints defined above only count positive data, negative values will be ignored
# Where the monitor wasn't worn, set the count value to -1
# Where the monitor wasn't valid, set the count value to -2
counts.fill_windows(nonwear_bouts, fill_value=-1)
counts.fill_windows(nonwear_bouts, fill_value=-2)
Beispiel #11
0
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange
from datetime import datetime, date, time, timedelta
from scipy import stats
import random
import copy

from pampro import Time_Series, Channel, channel_inference, Bout

execution_start = datetime.now()

ts = Time_Series.Time_Series("Actiheart")

# Load sample Actiheart data
filename = os.path.join(os.path.dirname(__file__), '..', 'data\ARBOTW.txt')

chans = Channel.load_channels(filename, "Actiheart")
#ts.add_channels(chans)
activity = chans[0]
ecg = chans[1]

# Calculate moving averages of the channels
ecg_ma = ecg.moving_average(15)
activity_ma = activity.moving_average(15)
ts.add_channel(ecg_ma)
ts.add_channel(activity_ma)

blah = activity.time_derivative()
blah = blah.moving_average(121)