def test_restrict_timeframe(): # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s start = counts.timestamps[0] end = counts.timestamps[-1] # Summarise the data before restricting summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"])])) # Number of 1s = 24 hours then 9 hours then 24 hours assert ( summary_before.get_channel("AG_Counts_sum").data[0] == (24 + 9 + 24) * 60) # Trim an hour off the start counts.restrict_timeframe(start + timedelta(hours=1), end) summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"])])) # Should be 1 hour less assert (summary_after.get_channel("AG_Counts_sum").data[0] == int( (23 + 9 + 24) * 60)) # Repeating exactly the same thing should change nothing counts.restrict_timeframe(start + timedelta(hours=1), end) summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"])])) assert (summary_after.get_channel("AG_Counts_sum").data[0] == int( (23 + 9 + 24) * 60)) # Now trim an hour off the end counts.restrict_timeframe(start + timedelta(hours=1), end - timedelta(hours=1)) summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"])])) # Should now be an hour less again assert (summary_after.get_channel("AG_Counts_sum").data[0] == int( (23 + 9 + 23) * 60)) # Now trim to a single hour counts.restrict_timeframe(start + timedelta(hours=12), start + timedelta(hours=12, minutes=59)) summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"])])) # Should now be an hour of 1s assert (summary_after.get_channel("AG_Counts_sum").data[0] == 60)
def build_statistics_channels(self, windows, statistics, name=""): channel_list = [] for stat in statistics: #print(stat) channel_names = design_variable_names(self.name, stat) #print(channel_names) for cn in channel_names: channel_list.append(Channel(cn)) num_expected_results = len(channel_list) for window in windows: results = self.window_statistics(window.start_timestamp, window.end_timestamp, statistics) if len(results) != num_expected_results: raise Exception("Incorrect number of statistics yielded. {} expected, {} given. Channel: {}. Statistics: {}.".format(num_expected_results, len(results), self.name, statistics)) for i in range(len(results)): #print len(results) channel_list[i].append_data(window.start_timestamp, results[i]) for channel in channel_list: channel.calculate_timeframe() channel.data = np.array(channel.data) channel.timestamps = np.array(channel.timestamps) ts = Time_Series(name) ts.add_channels(channel_list) return ts
def test_a(): # Case A # Both timestamps preceed data origin = counts.timestamps[0] start = origin - timedelta(days=2) end = origin - timedelta(days=1) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # All values should be identical, loop through them and assert equality suffixes = "sum n missing 0_0 0_1 1_1".split(" ") for suffix in suffixes: assert (summary_before.get_channel("AG_Counts_" + suffix).data[0] == summary_after.get_channel("AG_Counts_" + suffix).data[0])
def test_nonwear_amount(): # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s nonwear_bouts, wear_bouts = channel_inference.infer_nonwear_actigraph(counts) # There is 1 nonwear bout and 2 wear bouts surrounding it assert(len(nonwear_bouts) == 1) assert(len(wear_bouts) == 2) Bout.cache_lengths(nonwear_bouts) Bout.cache_lengths(wear_bouts) nw_bout = nonwear_bouts[0] # The nonwear bout is 15 hours long assert(nw_bout.length == timedelta(hours=15)) # Summarise the data before deleting the nonwear summary_before = Time_Series.Time_Series("") summary_before.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])])) # Number of 1s = 24 hours then 9 hours then 24 hours assert(summary_before.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60) # 15 hours of 0s assert(summary_before.get_channel("AG_Counts_0_0").data[0] == 15*60) # Sum should = number of 1s assert(summary_before.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60) # n should be 3 days = 1440*3 = 24*3*60 assert(summary_before.get_channel("AG_Counts_n").data[0] == 24*3*60) # Missing should be 0 assert(summary_before.get_channel("AG_Counts_missing").data[0] == 0) counts.delete_windows(nonwear_bouts) # Summarise the data after deleting the nonwear summary_after = Time_Series.Time_Series("") summary_after.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])])) # Sum shouldn't have changed assert(summary_after.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60) # All the 0s were nonwear, so there should now be no 0s assert(summary_after.get_channel("AG_Counts_0_0").data[0] == 0) # And the number of 1s shouldn't have changed assert(summary_after.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60) # n should have reduced by 15 hours = 15*60 assert(summary_after.get_channel("AG_Counts_n").data[0] == (24+9+24)*60) # missing should have gone up by 15 hours = 15*60 assert(summary_after.get_channel("AG_Counts_missing").data[0] == 15*60)
def test_f(): # Case F # Multiple deletions producing consistent results origin = counts.timestamps[0] # Delete first 2 hours start = origin end = origin + timedelta(hours=2) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after_a = Time_Series.Time_Series("") summary_after_a.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # Delete midday to 2pm start = origin + timedelta(hours=12) end = origin + timedelta(hours=14) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after_b = Time_Series.Time_Series("") summary_after_b.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # 20 hours left assert (summary_after_b.get_channel("AG_Counts_n").data[0] == 20 * 60) # 4 hours missing assert (summary_after_b.get_channel("AG_Counts_missing").data[0] == 4 * 60) # Sum data should be 20 1s assert (summary_after_b.get_channel("AG_Counts_sum").data[0] == 20 * 60)
def test_b(): # Case B # First timestamp preceeds data, second doesn't origin = counts.timestamps[0] start = origin - timedelta(hours=12) end = origin + timedelta(hours=12) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # n should go down and missing should go up assert (summary_before.get_channel("AG_Counts_n").data[0] > summary_after.get_channel("AG_Counts_n").data[0]) assert (summary_before.get_channel("AG_Counts_missing").data[0] < summary_after.get_channel("AG_Counts_missing").data[0]) # Should only be 12 hours left assert (summary_after.get_channel("AG_Counts_n").data[0] == 12 * 60) # And 12 hours missing assert (summary_after.get_channel("AG_Counts_missing").data[0] == 12 * 60)
def test_c(): # Case C # Both timestamps inside data origin = counts.timestamps[0] start = origin + timedelta(hours=6) end = origin + timedelta(hours=7) # Summarise the data before deletion summary_before = Time_Series.Time_Series("") summary_before.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) counts.delete_windows([Bout.Bout(start, end)]) # Summarise the data after deletion summary_after = Time_Series.Time_Series("") summary_after.add_channels( counts.summary_statistics(statistics=[( "generic", ["sum", "n", "missing"]), ("cutpoints", [[0, 0], [0, 1], [1, 1]])])) # n should go down and missing should go up assert (summary_before.get_channel("AG_Counts_n").data[0] > summary_after.get_channel("AG_Counts_n").data[0]) assert (summary_before.get_channel("AG_Counts_missing").data[0] < summary_after.get_channel("AG_Counts_missing").data[0]) # Should only be 23 hours left assert (summary_after.get_channel("AG_Counts_n").data[0] == 23 * 60) # And 1 hours missing assert (summary_after.get_channel("AG_Counts_missing").data[0] == 1 * 60)
# Load sample activPAL data x, y, z = Channel.load_channels( "/pa/data/STVS/_data/activpal_data/714952C-AP1335893 18Nov13 10-00am for 7d 23h 14m.datx", "activPAL") # Autocalibrate the raw acceleration data x, y, z, (cal_params), (results), (misc) = triaxial_calibration.calibrate( x, y, z) # Infer some sample level info from the three channels - VM, ENMO, Pitch & Roll vm = channel_inference.infer_vector_magnitude(x, y, z) enmo = channel_inference.infer_enmo(vm) pitch, roll = channel_inference.infer_pitch_roll(x, y, z) # Create a time series object and add all signals to it ts = Time_Series.Time_Series("activPAL") ts.add_channels([x, y, z, vm, enmo, pitch, roll]) # Request some stats about the time series # In this case: mean ENMO, pitch and roll, and 10 degree cutpoints of pitch and roll angle_levels = [[-90, -80], [-80, -70], [-70, -60], [-60, -50], [-50, -40], [-40, -30], [-30, -20], [-20, -10], [-10, 0], [0, 10], [10, 20], [20, 30], [30, 40], [40, 50], [50, 60], [60, 70], [70, 80], [80, 90]] stat_dict = { "Pitch": angle_levels + ["mean"], "Roll": angle_levels + ["mean"], "ENMO": ["mean"] } # Get the output at 15 minute level
def qc_analysis(job_details): id_num = str(job_details["pid"]) filename = job_details["filename"] filename_short = os.path.basename(filename).split('.')[0] battery_max = 0 if filetype == "GeneActiv": battery_max = GA_battery_max elif filetype == "Axivity": battery_max = AX_battery_max # Load the data from the hdf5 file ts, header = data_loading.fast_load(filename, filetype) header["QC_filename"] = os.path.basename(filename) x, y, z, battery, temperature = ts.get_channels(["X", "Y", "Z", "Battery", "Temperature"]) # create a channel of battery percentage, based on the assumed battery maximum value battery_pct = Channel.Channel.clone(battery) battery_pct.data = (battery.data / battery_max) * 100 channels = [x, y, z, battery, temperature, battery_pct] anomalies = diagnostics.diagnose_fix_anomalies(channels, discrepancy_threshold=2) # create dictionary of anomalies types anomalies_dict = dict() # check whether any anomalies have been found: if len(anomalies) > 0: anomalies_file = os.path.join(anomalies_folder, "{}_anomalies.csv".format(filename_short)) df = pd.DataFrame(anomalies) for type in anomaly_types: anomalies_dict["QC_anomaly_{}".format(type)] = (df.anomaly_type.values == type).sum() df = df.set_index("anomaly_type") # print record of anomalies to anomalies_file df.to_csv(anomalies_file) else: for type in anomaly_types: anomalies_dict["QC_anomaly_{}".format(type)] = 0 # check for axis anomalies axes_dict = diagnostics.diagnose_axes(x, y, z, noise_cutoff_mg=13) axis_anomaly = False for key, val in axes_dict.items(): anomalies_dict["QC_{}".format(key)] = val if key.endswith("max"): if val > axis_max: axis_anomaly = True elif key.endswith("min"): if val < axis_min: axis_anomaly = True # create a "check battery" flag: check_battery = False # calculate first and last battery percentages first_battery_pct = round((battery_pct.data[1]),2) last_battery_pct = round((battery_pct.data[-1]),2) header["QC_first_battery_pct"] = first_battery_pct header["QC_last_battery_pct"] = last_battery_pct # calculate lowest battery percentage # check if battery.pct has a missing_value, exclude those values if they exist if battery_pct.missing_value == "None": lowest_battery_pct = min(battery_pct.data) else: test_array = np.delete(battery_pct.data, np.where(battery_pct.data == battery_pct.missing_value)) lowest_battery_pct = min(test_array) header["QC_lowest_battery_pct"] = round(lowest_battery_pct,2) header["QC_lowest_battery_threshold"] = battery_minimum # find the maximum battery discharge in any 24hr period: max_discharge = battery_pct.channel_max_decrease(time_period=timedelta(hours=discharge_hours)) header["QC_max_discharge"] = round(max_discharge, 2) header["QC_discharge_time_period"] = "{} hours".format(discharge_hours) header["QC_discharge_threshold"] = discharge_pct # change flag if lowest battery percentage dips below battery_minimum at any point # OR maximum discharge greater than discharge_pct over time period "hours = discharge_hours" if lowest_battery_pct < battery_minimum or max_discharge > discharge_pct: check_battery = True header["QC_check_battery"] = str(check_battery) header["QC_axis_anomaly"] = str(axis_anomaly) # Calculate the time frame to use start = time_utilities.start_of_day(x.timeframe[0]) end = time_utilities.end_of_day(x.timeframe[-1]) tp = (start, end) results_ts = Time_Series.Time_Series("") # Derive some signal features vm = channel_inference.infer_vector_magnitude(x, y, z) enmo = channel_inference.infer_enmo(vm) enmo.minimum = 0 enmo.maximum = enmo_max # Infer nonwear nonwear_bouts = channel_inference.infer_nonwear_for_qc(x, y, z, noise_cutoff_mg=noise_cutoff_mg) # Use nonwear bouts to calculate wear bouts wear_bouts = Bout.time_period_minus_bouts(enmo.timeframe, nonwear_bouts) # Use wear bouts to calculate the amount of wear time in the file in hours, save to meta data total_wear = Bout.total_time(wear_bouts) total_seconds_wear = total_wear.total_seconds() total_hours_wear = round(total_seconds_wear/3600) header["QC_total_hours_wear"] = total_hours_wear # Split the enmo channel into lists of bouts for each quadrant: ''' quadrant_0 = 00:00 -> 06: 00 quadrant_1 = 06:00 -> 12: 00 quadrant_2 = 12:00 -> 18: 00 quadrant_3 = 18:00 -> 00: 00 ''' q_0, q_1, q_2, q_3 = channel_inference.create_quadrant_bouts(enmo) # calculate the intersection of each set of bouts with wear_bouts, then calculate the wear time in each quadrant. sum_quadrant_wear = 0 for quadrant, name1, name2 in ([q_0, "QC_hours_wear_quadrant_0", "QC_pct_wear_quadrant_0"], [q_1, "QC_hours_wear_quadrant_1", "QC_pct_wear_quadrant_1"], [q_2, "QC_hours_wear_quadrant_2", "QC_pct_wear_quadrant_2"], [q_3, "QC_hours_wear_quadrant_3", "QC_pct_wear_quadrant_3"]): quadrant_wear = Bout.bout_list_intersection(quadrant, wear_bouts) seconds_wear = Bout.total_time(quadrant_wear).total_seconds() hours_wear = round(seconds_wear / 3600) header[name1] = hours_wear header[name2] = round(((hours_wear / total_hours_wear) * 100), 2) for bout in nonwear_bouts: # Show non-wear bouts in purple bout.draw_properties = {'lw': 0, 'alpha': 0.75, 'facecolor': '#764af9'} for channel, channel_name in zip([enmo, battery_pct],["ENMO", "Battery_percentage"]): channel.name = channel_name results_ts.add_channel(channel) if PLOT == "YES": # Plot statistics as subplots in one plot file per data file results_ts["ENMO"].add_annotations(nonwear_bouts) results_ts.draw_qc(plotting_df, file_target=os.path.join(charts_folder,"{}_plots.png".format(filename_short))) header["QC_script"] = version # file of metadata from qc process qc_output = os.path.join(results_folder, "qc_meta_{}.csv".format(filename_short)) # check if qc_output already exists... if os.path.isfile(qc_output): os.remove(qc_output) metadata = {**header, **anomalies_dict} # write metadata to file pampro_utilities.dict_write(qc_output, id_num, metadata) for c in ts: del c.data del c.timestamps del c.indices del c.cached_indices
# Request some interesting statistics - mean, min and max of the counts signal # ...plus basic cutpoints for Sedentary, Light, and Moderate to Vigorous stats = { "AG_Counts": [("generic", ["mean", "min", "max"]), ("cutpoints", [[0, 99], [100, 2999], [3000, 99999]])] } # Load Actigraph data counts, header = Channel.load_channels( "/pa/data/Tom/pampro/data/example_actigraph.DAT", "Actigraph", datetime_format="%m/%d/%Y") ts = Time_Series.Time_Series("Actigraph") ts.add_channel(counts) # Get a list of bouts where the monitor was & wasn't worn nonwear_bouts = channel_inference.infer_nonwear_actigraph( counts, zero_minutes=timedelta(minutes=90)) # Use that list to get a list of days of valid & invalid time invalid_bouts = channel_inference.infer_valid_days(counts, wear_bouts) # Since the cutpoints defined above only count positive data, negative values will be ignored # Where the monitor wasn't worn, set the count value to -1 # Where the monitor wasn't valid, set the count value to -2 counts.fill_windows(nonwear_bouts, fill_value=-1) counts.fill_windows(nonwear_bouts, fill_value=-2)
import os import numpy as np import matplotlib.pyplot as plt from matplotlib.dates import DayLocator, HourLocator, DateFormatter, drange from datetime import datetime, date, time, timedelta from scipy import stats import random import copy from pampro import Time_Series, Channel, channel_inference, Bout execution_start = datetime.now() ts = Time_Series.Time_Series("Actiheart") # Load sample Actiheart data filename = os.path.join(os.path.dirname(__file__), '..', 'data\ARBOTW.txt') chans = Channel.load_channels(filename, "Actiheart") #ts.add_channels(chans) activity = chans[0] ecg = chans[1] # Calculate moving averages of the channels ecg_ma = ecg.moving_average(15) activity_ma = activity.moving_average(15) ts.add_channel(ecg_ma) ts.add_channel(activity_ma) blah = activity.time_derivative() blah = blah.moving_average(121)