Ejemplo n.º 1
0
def infer_valid_days(channel, wear_bouts, valid_criterion=timedelta(hours=10)):

    #Generate day-long windows
    start = time_utilities.start_of_day(channel.timestamps[0])
    day_windows = []
    while start < channel.timeframe[1]:
        day_windows.append(Bout.Bout(start, start+timedelta(days=1)))
        start += timedelta(days=1)

    valid_windows = []
    invalid_windows = []
    for window in day_windows:
        #how much does all of wear_bouts intersect with window?
        intersections = Bout.bout_list_intersection([window], wear_bouts)

        total = Bout.total_time(intersections)

        # If the amount of overlap exceeds the valid criterion, it is valid
        if total >= valid_criterion:
            #window.draw_properties={"lw":0, "facecolor":[1,0,0], "alpha":0.25}
            valid_windows.append(window)
        else:
            invalid_windows.append(window)


    return(invalid_windows, valid_windows)
Ejemplo n.º 2
0
def test_bouts():

    one_bouts = counts.bouts(1, 1)

    # There are 7 bouts
    assert len(one_bouts) == 7

    # Their length is 1, 2, 3, .. 7
    assert Bout.total_time(one_bouts) == timedelta(minutes=7 + 6 + 5 + 4 + 3 + 2 + 1)

    # Keeping bouts >= i minutes means there should be 7-(i-1) left
    for i in range(1, 7):
        i_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=i))
        assert len(i_or_longer) == 7 - (i - 1)

    # One manual check
    three_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=3))
    assert len(three_or_longer) == 5

    # This should exclude the 1 bout at exactly 3 minutes
    three_plus_bit_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=3, seconds=1))
    assert len(three_plus_bit_or_longer) == 4

    # No bouts should be this long
    eight_or_longer = Bout.limit_to_lengths(one_bouts, min_length=timedelta(minutes=8))
    assert len(eight_or_longer) == 0

    # There is nothing above 1 in the file, should be 0 bouts
    two_bouts = counts.bouts(2, 989)
    assert len(two_bouts) == 0
Ejemplo n.º 3
0
def test_extracted_bouts():

    one_bouts = counts.bouts(1,1)
    zero_bouts = counts.bouts(0,0)

    # Bouts where counts == 0 and counts == 1 should be mutually excluse
    # So there should be no intersections between them
    intersections = Bout.bout_list_intersection(one_bouts, zero_bouts)

    assert(len(intersections) == 0)

    # A bout that spans the whole time period should completely intersect with bouts where counts == 1
    one_big_bout = Bout.Bout(counts.timestamps[0]-timedelta(days=1), counts.timestamps[-1]+timedelta(days=1))

    one_intersections = Bout.bout_list_intersection(one_bouts, [one_big_bout])
    assert(Bout.total_time(one_intersections) == Bout.total_time(one_bouts))

    # Same for zeros
    zero_intersections = Bout.bout_list_intersection(zero_bouts, [one_big_bout])
    assert(Bout.total_time(zero_intersections) == Bout.total_time(zero_bouts))

    # Filling in the bout gaps of one bouts should recreate the zero bouts
    inverse_of_one_bouts = Bout.time_period_minus_bouts((counts.timeframe[0], counts.timeframe[1]+timedelta(minutes=1)), one_bouts)

    # They should have the same n
    assert(len(inverse_of_one_bouts) == len(zero_bouts))

    # Same total amount of time
    assert(Bout.total_time(inverse_of_one_bouts) == Bout.total_time(zero_bouts))
Ejemplo n.º 4
0
def test_artificial_bouts():

    start_a = datetime.strptime("01/01/2000", "%d/%m/%Y")
    end_a = start_a + timedelta(hours=1)
    bout_a = Bout.Bout(start_a, end_a)

    # Hour long bout
    assert (bout_a.length == timedelta(hours=1))

    start_b = datetime.strptime("01/01/2000", "%d/%m/%Y")
    end_b = start_a + timedelta(minutes=15)
    bout_b = Bout.Bout(start_b, end_b)

    # They share common time
    assert (bout_a.overlaps(bout_b))

    # 15 minutes, to be precise
    intersection = bout_a.intersection(bout_b)
    assert (intersection.length == timedelta(minutes=15))

    start_c = datetime.strptime("01/02/2000", "%d/%m/%Y")
    end_c = start_c + timedelta(days=1)
    bout_c = Bout.Bout(start_c, end_c)

    # No overlap of those bouts
    assert (not bout_a.overlaps(bout_c))

    # bout_a ends exactly as bout_d starts
    # there should be no overlap (0 common time)
    start_d = end_a
    end_d = start_d + timedelta(minutes=1)
    bout_d = Bout.Bout(start_d, end_d)
    assert (not bout_a.overlaps(bout_d))
Ejemplo n.º 5
0
    def summary_statistics(self, statistics=[("generic", "mean")], time_period=False, name=""):

        if time_period == False:
            windows = [Bout(self.timeframe[0], self.timeframe[1]+timedelta(days=1111))]
        else:
            windows = [Bout(time_period[0],time_period[1])]

        return self.build_statistics_channels(windows, statistics, name=name)
Ejemplo n.º 6
0
def test_nonwear_amount():

    # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s

    nonwear_bouts, wear_bouts = channel_inference.infer_nonwear_actigraph(counts)

    # There is 1 nonwear bout and 2 wear bouts surrounding it
    assert(len(nonwear_bouts) == 1)
    assert(len(wear_bouts) == 2)

    Bout.cache_lengths(nonwear_bouts)
    Bout.cache_lengths(wear_bouts)

    nw_bout = nonwear_bouts[0]

    # The nonwear bout is 15 hours long
    assert(nw_bout.length == timedelta(hours=15))

    # Summarise the data before deleting the nonwear
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])]))

    # Number of 1s = 24 hours then 9 hours then 24 hours
    assert(summary_before.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60)

    # 15 hours of 0s
    assert(summary_before.get_channel("AG_Counts_0_0").data[0] == 15*60)

    # Sum should = number of 1s
    assert(summary_before.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60)

    # n should be 3 days = 1440*3 = 24*3*60
    assert(summary_before.get_channel("AG_Counts_n").data[0] == 24*3*60)

    # Missing should be 0
    assert(summary_before.get_channel("AG_Counts_missing").data[0] == 0)

    counts.delete_windows(nonwear_bouts)

    # Summarise the data after deleting the nonwear
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])]))

    # Sum shouldn't have changed
    assert(summary_after.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60)

    # All the 0s were nonwear, so there should now be no 0s
    assert(summary_after.get_channel("AG_Counts_0_0").data[0] == 0)

    # And the number of 1s shouldn't have changed
    assert(summary_after.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60)

    # n should have reduced by 15 hours = 15*60
    assert(summary_after.get_channel("AG_Counts_n").data[0] == (24+9+24)*60)

    # missing should have gone up by 15 hours = 15*60
    assert(summary_after.get_channel("AG_Counts_missing").data[0] == 15*60)
Ejemplo n.º 7
0
def test_nonwear_amount():

    # File contains 24 hours of 1s, then 15 hours of 0s, then 9 hours of 1s, then 24 hours of 1s

    nonwear_bouts, wear_bouts = channel_inference.infer_nonwear_actigraph(counts)

    # There is 1 nonwear bout and 2 wear bouts surrounding it
    assert(len(nonwear_bouts) == 1)
    assert(len(wear_bouts) == 2)

    Bout.cache_lengths(nonwear_bouts)
    Bout.cache_lengths(wear_bouts)

    nw_bout = nonwear_bouts[0]

    # The nonwear bout is 15 hours long
    assert(nw_bout.length == timedelta(hours=15))

    # Summarise the data before deleting the nonwear
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])]))

    # Number of 1s = 24 hours then 9 hours then 24 hours
    assert(summary_before.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60)

    # 15 hours of 0s
    assert(summary_before.get_channel("AG_Counts_0_0").data[0] == 15*60)

    # Sum should = number of 1s
    assert(summary_before.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60)

    # n should be 3 days = 1440*3 = 24*3*60
    assert(summary_before.get_channel("AG_Counts_n").data[0] == 24*3*60)

    # Missing should be 0
    assert(summary_before.get_channel("AG_Counts_missing").data[0] == 0)

    counts.delete_windows(nonwear_bouts)

    # Summarise the data after deleting the nonwear
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(counts.summary_statistics(statistics=[("generic", ["sum", "n", "missing"]),("cutpoints", [[0,0],[0,1],[1,1]])]))

    # Sum shouldn't have changed
    assert(summary_after.get_channel("AG_Counts_sum").data[0] == (24+9+24)*60)

    # All the 0s were nonwear, so there should now be no 0s
    assert(summary_after.get_channel("AG_Counts_0_0").data[0] == 0)

    # And the number of 1s shouldn't have changed
    assert(summary_after.get_channel("AG_Counts_1_1").data[0] == (24+9+24)*60)

    # n should have reduced by 15 hours = 15*60
    assert(summary_after.get_channel("AG_Counts_n").data[0] == (24+9+24)*60)

    # missing should have gone up by 15 hours = 15*60
    assert(summary_after.get_channel("AG_Counts_missing").data[0] == 15*60)
Ejemplo n.º 8
0
def test_bouts():

    # There are 8 bouts of 0s
    zero_bouts = counts.bouts(0, 0)
    assert (len(zero_bouts) == 8)

    # There are 8 bouts of 1s
    one_bouts = counts.bouts(1, 1)
    assert (len(one_bouts) == 8)

    # Since there are only 1s and 0s in the file, there should be 1 bout of 0 to 1
    both_bouts = counts.bouts(0, 1)
    assert (len(both_bouts) == 1)

    # The timestamps of that 1 bout should match the start and end of the channel timestamps
    # But "end" of bout occurs 1 minute after end of channel
    assert (both_bouts[0].start_timestamp == counts.timestamps[0])
    assert (both_bouts[0].end_timestamp == counts.timestamps[-1] +
            timedelta(minutes=1))

    # Changing the max value shouldn't change anything
    bouts = counts.bouts(0, 23)
    assert (len(bouts) == 1)

    # Same for the minimum value
    bouts = counts.bouts(-340, 23)
    assert (len(bouts) == 1)

    # Should be no bouts 2 or above
    bouts = counts.bouts(2, 23)
    assert (len(bouts) == 0)

    # Same for below 0
    bouts = counts.bouts(-32323, -2)
    assert (len(bouts) == 0)

    # The data is in 1 minute epochs
    total_zero_time = Bout.total_time(zero_bouts)
    total_one_time = Bout.total_time(one_bouts)
    total_both_time = Bout.total_time(both_bouts)

    assert (total_zero_time == timedelta(minutes=10 * 30))
    assert (total_one_time == timedelta(minutes=16 * 30))
    assert (total_both_time == total_zero_time + total_one_time)

    # Integer seconds spent at 0 should be 300 minutes * 60 = 18000 seconds
    total_zero_time_seconds = total_zero_time.total_seconds()
    assert (total_zero_time_seconds == 10 * 30 * 60)

    # Inverting bouts within a period
    # Since the file is 0s and 1s, the total time - the time spent @ 0 should = time spent @ 1
    not_zero_bouts = Bout.time_period_minus_bouts(
        (counts.timestamps[0], counts.timestamps[-1] + timedelta(minutes=1)),
        zero_bouts)
    total_not_zero_time = Bout.total_time(not_zero_bouts)
    assert (total_not_zero_time == total_one_time)
Ejemplo n.º 9
0
def test_bouts():

    # There are 8 bouts of 0s
    zero_bouts = counts.bouts(0,0)
    assert(len(zero_bouts) == 8)

    # There are 8 bouts of 1s
    one_bouts = counts.bouts(1,1)
    assert(len(one_bouts) == 8)

    # Since there are only 1s and 0s in the file, there should be 1 bout of 0 to 1
    both_bouts = counts.bouts(0,1)
    assert(len(both_bouts) == 1)

    # The timestamps of that 1 bout should match the start and end of the channel timestamps
    # But "end" of bout occurs 1 minute after end of channel
    assert(both_bouts[0].start_timestamp == counts.timestamps[0])
    assert(both_bouts[0].end_timestamp == counts.timestamps[-1]+timedelta(minutes=1))

    # Changing the max value shouldn't change anything
    bouts = counts.bouts(0,23)
    assert(len(bouts) == 1)

    # Same for the minimum value
    bouts = counts.bouts(-340,23)
    assert(len(bouts) == 1)

    # Should be no bouts 2 or above
    bouts = counts.bouts(2,23)
    assert(len(bouts) == 0)

    # Same for below 0
    bouts = counts.bouts(-32323,-2)
    assert(len(bouts) == 0)

    # The data is in 1 minute epochs
    total_zero_time = Bout.total_time(zero_bouts)
    total_one_time = Bout.total_time(one_bouts)
    total_both_time = Bout.total_time(both_bouts)

    assert(total_zero_time == timedelta(minutes=10*30))
    assert(total_one_time == timedelta(minutes=16*30))
    assert(total_both_time == total_zero_time + total_one_time)

    # Integer seconds spent at 0 should be 300 minutes * 60 = 18000 seconds
    total_zero_time_seconds = total_zero_time.total_seconds()
    assert(total_zero_time_seconds == 10*30*60)

    # Inverting bouts within a period
    # Since the file is 0s and 1s, the total time - the time spent @ 0 should = time spent @ 1
    not_zero_bouts = Bout.time_period_minus_bouts((counts.timestamps[0],counts.timestamps[-1]+timedelta(minutes=1)), zero_bouts)
    total_not_zero_time = Bout.total_time(not_zero_bouts)
    assert(total_not_zero_time == total_one_time)
Ejemplo n.º 10
0
def infer_nonwear_actigraph(counts, zero_minutes=timedelta(minutes=60)):
    """Given an Actigraph counts signal, infer nonwear as consecutive zeros of a given duration. """

    # List all bouts where the signal was <= 0
    nonwear_bouts = counts.bouts(-999999, 0)

    # Limit those bouts to the minimum duration specified in "zero_minutes"
    nonwear_bouts = Bout.limit_to_lengths(nonwear_bouts, min_length=zero_minutes)

    # Invert the nonwear bouts to get wear bouts
    wear_bouts = Bout.time_period_minus_bouts([counts.timeframe[0], counts.timeframe[1]], nonwear_bouts)

    return nonwear_bouts, wear_bouts
Ejemplo n.º 11
0
def test_f():
    # Case F
    # Multiple deletions producing consistent results

    origin = counts.timestamps[0]

    # Delete first 2 hours
    start = origin
    end = origin + timedelta(hours=2)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after_a = Time_Series.Time_Series("")
    summary_after_a.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # Delete midday to 2pm
    start = origin + timedelta(hours=12)
    end = origin + timedelta(hours=14)

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after_b = Time_Series.Time_Series("")
    summary_after_b.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # 20 hours left
    assert (summary_after_b.get_channel("AG_Counts_n").data[0] == 20 * 60)

    # 4 hours missing
    assert (summary_after_b.get_channel("AG_Counts_missing").data[0] == 4 * 60)

    # Sum data should be 20 1s
    assert (summary_after_b.get_channel("AG_Counts_sum").data[0] == 20 * 60)
Ejemplo n.º 12
0
    def window_statistics(self, start_dts, end_dts, statistics):

        window = Bout.Bout(start_dts, end_dts)
        bouts = self.bouts_involved(window)

        output_row = []
        if (len(bouts) > 0):

            for stat in statistics:

                if stat[0] == "generic":

                    for val1 in stat[1]:
                        if val1 == "sum":

                            intersection = Bout.bout_list_intersection([window],bouts)
                            Bout.cache_lengths(intersection)
                            sum_seconds = Bout.total_time(intersection).total_seconds()
                            output_row.append(sum_seconds)

                        elif val1 == "mean":

                            intersection = Bout.bout_list_intersection([window],bouts)
                            Bout.cache_lengths(intersection)
                            sum_seconds = Bout.total_time(intersection).total_seconds()

                            if sum_seconds >0 and len(bouts) > 0:
                                output_row.append( sum_seconds / len(bouts) )
                            else:
                                output_row.append(0)

                        elif val1 == "n":

                            output_row.append( len(bouts) )

                        else:
                            print("nooooooooo")
                            print(stat)
                            print(statistics)
                            output_row.append(-1)

                elif stat[0] == "sdx":

                    # ("sdx", [10,20,30,40,50,60,70,80,90])

                    sdx_results = sdx(bouts, stat[1])
                    for r in sdx_results:
                        output_row.append(r)

        else:
            # No bouts in this Bout_Collection overlapping this window
            # There was no data for the time period
            # Output -1 for each missing variable
            for i in range(self.expected_results(statistics)):
                output_row.append(-1)


        return output_row
Ejemplo n.º 13
0
def test_a():
    # Case A
    # Both timestamps preceed data

    origin = counts.timestamps[0]

    start = origin - timedelta(days=2)
    end = origin - timedelta(days=1)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # All values should be identical, loop through them and assert equality
    suffixes = "sum n missing 0_0 0_1 1_1".split(" ")

    for suffix in suffixes:
        assert (summary_before.get_channel("AG_Counts_" + suffix).data[0] ==
                summary_after.get_channel("AG_Counts_" + suffix).data[0])
Ejemplo n.º 14
0
    def piecewise_statistics(self, window_size, statistics=[("generic", "mean")], time_period=False, name=""):

        if time_period == False:
            start = self.timeframe[0] - timedelta(hours=self.timeframe[0].hour, minutes=self.timeframe[0].minute, seconds=self.timeframe[0].second, microseconds=self.timeframe[0].microsecond)
            end = self.timeframe[1] + timedelta(hours=23-self.timeframe[1].hour, minutes=59-self.timeframe[1].minute, seconds=59-self.timeframe[1].second, microseconds=999999-self.timeframe[1].microsecond)
        else:
            start = time_period[0]
            end = time_period[1]

        #print("Piecewise statistics: {}".format(self.name))

        windows = []

        start_dts = start
        end_dts = start + window_size

        while start_dts < end:

            window = Bout(start_dts, end_dts)
            windows.append(window)

            start_dts = start_dts + window_size
            end_dts = end_dts + window_size

        return self.build_statistics_channels(windows, statistics, name=name)
Ejemplo n.º 15
0
    def window_statistics(self, start_dts, end_dts, statistics):

        window = Bout(start_dts, end_dts)
        bouts = self.bouts_involved(window)

        output_row = []
        if (len(bouts) > 0):

            for stat in statistics:

                if stat[0] == "generic":

                    for val1 in stat[1]:
                        if val1 == "sum":

                            intersection = bout_list_intersection([window],bouts)
                            cache_lengths(intersection)
                            sum_seconds = total_time(intersection).total_seconds()
                            output_row.append(sum_seconds)

                        elif val1 == "mean":

                            intersection = bout_list_intersection([window],bouts)
                            cache_lengths(intersection)
                            sum_seconds = total_time(intersection).total_seconds()

                            if sum_seconds >0 and len(bouts) > 0:
                                output_row.append( sum_seconds / len(bouts) )
                            else:
                                output_row.append(0)

                        elif val1 == "n":

                            output_row.append( len(bouts) )

                        else:
                            print("nooooooooo")
                            print(stat)
                            print(statistics)
                            output_row.append(-1)

                elif stat[0] == "sdx":

                    # ("sdx", [10,20,30,40,50,60,70,80,90])

                    sdx_results = sdx(bouts, stat[1])
                    for r in sdx_results:
                        output_row.append(r)

        else:
            # No bouts in this Bout_Collection overlapping this window
            # There was no data for the time period
            # Output -1 for each missing variable
            for i in range(self.expected_results(statistics)):
                output_row.append(-1)


        return output_row
Ejemplo n.º 16
0
def produce_binary_channels(bouts, lengths, skeleton_channel):

    Bout.cache_lengths(bouts)
    bouts.sort(key=lambda x: x.length, reverse=True)

    channels = []
    for length in lengths:

        # Drop bouts from list if their length is less than x minutes
        bouts = Bout.limit_to_lengths(bouts, min_length=length, sorted=True)

        channel_name = "{}_mt{}".format(skeleton_channel.name,length)

        # Clone the blank channel, set data to 1 where time is inside any of the bouts
        skeleton_copy = copy.deepcopy(skeleton_channel)
        chan = Channel.channel_from_bouts(bouts, False, False, channel_name, skeleton=skeleton_copy)
        channels.append(chan)

    return channels
Ejemplo n.º 17
0
def sdx(bouts, percentages):

    total_time_minutes = Bout.total_time(bouts).total_seconds()/60

    Bout.cache_lengths(bouts)
    bouts.sort(key=lambda x : x.length)

    highest_length_minutes = int(bouts[-1].length.total_seconds()/60)

    targets_minutes = [int((total_time_minutes)/100.0 * percentage) for percentage in percentages]
    results = []

    #print("Number of bouts: ", len(bouts))
    #print("Total time mins: ", total_time_minutes)
    #print("Highest length mins", highest_length_minutes)
    #print(targets_minutes)

    current_target_index = 0
    target_minutes = targets_minutes[current_target_index]
    for length in range(1, highest_length_minutes+1):

        included_bouts = [b for b in bouts if b.length.total_seconds()/60 <= length]
        #print(included_bouts)
        total_included_time_minutes = Bout.total_time(included_bouts).total_seconds()/60

        #print(length, total_included_time_minutes)
        while total_included_time_minutes >= target_minutes:

            #print(">target_minutes", target_minutes)
            #length is the result
            results.append(length)
            current_target_index += 1
            if current_target_index == len(targets_minutes):
                target_minutes = 999999999
            else:
                target_minutes = targets_minutes[current_target_index]

        if current_target_index == len(targets_minutes):
            break

    #print(results)
    return results
Ejemplo n.º 18
0
def infer_still_bouts_triaxial(x, y, z, window_size=timedelta(seconds=10), noise_cutoff_mg=13, minimum_length=timedelta(seconds=10)):

    # Get windows of standard deviation in each axis
    x_std = x.piecewise_statistics(window_size, statistics=[("generic", ["std"])], time_period=x.timeframe)[0]
    y_std = y.piecewise_statistics(window_size, statistics=[("generic", ["std"])], time_period=y.timeframe)[0]
    z_std = z.piecewise_statistics(window_size, statistics=[("generic", ["std"])], time_period=z.timeframe)[0]

    # Find bouts where standard deviation is below threshold for long periods
    x_bouts = x_std.bouts(0, float(noise_cutoff_mg)/1000.0)
    y_bouts = y_std.bouts(0, float(noise_cutoff_mg)/1000.0)
    z_bouts = z_std.bouts(0, float(noise_cutoff_mg)/1000.0)

    x_bouts = Bout.limit_to_lengths(x_bouts, min_length=minimum_length)
    y_bouts = Bout.limit_to_lengths(y_bouts, min_length=minimum_length)
    z_bouts = Bout.limit_to_lengths(z_bouts, min_length=minimum_length)

    # Get the times where those bouts overlap
    x_intersect_y = Bout.bout_list_intersection(x_bouts, y_bouts)
    x_intersect_y_intersect_z = Bout.bout_list_intersection(x_intersect_y, z_bouts)

    return x_intersect_y_intersect_z
Ejemplo n.º 19
0
def test_bouts():

    one_bouts = counts.bouts(1, 1)

    # There are 7 bouts
    assert (len(one_bouts) == 7)

    # Their length is 1, 2, 3, .. 7
    assert (Bout.total_time(one_bouts) == timedelta(minutes=7 + 6 + 5 + 4 + 3 +
                                                    2 + 1))

    # Keeping bouts >= i minutes means there should be 7-(i-1) left
    for i in range(1, 7):
        i_or_longer = Bout.limit_to_lengths(one_bouts,
                                            min_length=timedelta(minutes=i))
        assert (len(i_or_longer) == 7 - (i - 1))

    # One manual check
    three_or_longer = Bout.limit_to_lengths(one_bouts,
                                            min_length=timedelta(minutes=3))
    assert (len(three_or_longer) == 5)

    # This should exclude the 1 bout at exactly 3 minutes
    three_plus_bit_or_longer = Bout.limit_to_lengths(one_bouts,
                                                     min_length=timedelta(
                                                         minutes=3, seconds=1))
    assert (len(three_plus_bit_or_longer) == 4)

    # No bouts should be this long
    eight_or_longer = Bout.limit_to_lengths(one_bouts,
                                            min_length=timedelta(minutes=8))
    assert (len(eight_or_longer) == 0)

    # There is nothing above 1 in the file, should be 0 bouts
    two_bouts = counts.bouts(2, 989)
    assert (len(two_bouts) == 0)
Ejemplo n.º 20
0
def test_nonwear_positions():

    # Case 1: Nonwear at very beginning of file
    ts1, header1 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile23.dat", "Actigraph", datetime_format="%d/%m/%Y")
    counts1 = ts1.get_channel("AG_Counts")
    nonwear_bouts1, wear_bouts1 = channel_inference.infer_nonwear_actigraph(counts1)

    # Case 2: Nonwear in middle of file
    ts2, header2 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile24.dat", "Actigraph", datetime_format="%d/%m/%Y")
    counts2 = ts2.get_channel("AG_Counts")
    nonwear_bouts2, wear_bouts2 = channel_inference.infer_nonwear_actigraph(counts2)

    # Case 3: Nonwear at very end of file
    ts3, header3 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile25.dat", "Actigraph", datetime_format="%d/%m/%Y")
    counts3 = ts3.get_channel("AG_Counts")
    nonwear_bouts3, wear_bouts3 = channel_inference.infer_nonwear_actigraph(counts3)

    # They should all have the same duration of wear & nonwear
    assert(Bout.total_time(nonwear_bouts1) == timedelta(hours=2))
    assert(Bout.total_time(nonwear_bouts1) == Bout.total_time(nonwear_bouts2))
    assert(Bout.total_time(nonwear_bouts1) == Bout.total_time(nonwear_bouts3))
    assert(Bout.total_time(wear_bouts1) == Bout.total_time(wear_bouts2))
    assert(Bout.total_time(wear_bouts1) == Bout.total_time(wear_bouts3))

    # Delete the relevant nonwear bouts from each channel
    counts1.delete_windows(nonwear_bouts1)
    counts2.delete_windows(nonwear_bouts2)
    counts3.delete_windows(nonwear_bouts3)

    # Total data should be equal
    assert(sum(counts1.data) == sum(counts2.data))
    assert(sum(counts1.data) == sum(counts3.data))

    # Summary level mean should also be the same
    s1 = counts1.summary_statistics()[0]
    s2 = counts2.summary_statistics()[0]
    s3 = counts3.summary_statistics()[0]
    assert(s1.data[0] == s2.data[0])
    assert(s1.data[0] == s3.data[0])
Ejemplo n.º 21
0
def test_nonwear_positions():

    # Case 1: Nonwear at very beginning of file
    ts1, header1 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile23.dat", "Actigraph", datetime_format="%d/%m/%Y")
    counts1 = ts1.get_channel("AG_Counts")
    nonwear_bouts1, wear_bouts1 = channel_inference.infer_nonwear_actigraph(counts1)

    # Case 2: Nonwear in middle of file
    ts2, header2 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile24.dat", "Actigraph", datetime_format="%d/%m/%Y")
    counts2 = ts2.get_channel("AG_Counts")
    nonwear_bouts2, wear_bouts2 = channel_inference.infer_nonwear_actigraph(counts2)

    # Case 3: Nonwear at very end of file
    ts3, header3 = data_loading.load(os.path.abspath(__file__).replace(os.path.basename(__file__), "") + "_data/testfile25.dat", "Actigraph", datetime_format="%d/%m/%Y")
    counts3 = ts3.get_channel("AG_Counts")
    nonwear_bouts3, wear_bouts3 = channel_inference.infer_nonwear_actigraph(counts3)

    # They should all have the same duration of wear & nonwear
    assert(Bout.total_time(nonwear_bouts1) == timedelta(hours=2))
    assert(Bout.total_time(nonwear_bouts1) == Bout.total_time(nonwear_bouts2))
    assert(Bout.total_time(nonwear_bouts1) == Bout.total_time(nonwear_bouts3))
    assert(Bout.total_time(wear_bouts1) == Bout.total_time(wear_bouts2))
    assert(Bout.total_time(wear_bouts1) == Bout.total_time(wear_bouts3))

    # Delete the relevant nonwear bouts from each channel
    counts1.delete_windows(nonwear_bouts1)
    counts2.delete_windows(nonwear_bouts2)
    counts3.delete_windows(nonwear_bouts3)

    # Total data should be equal
    assert(sum(counts1.data) == sum(counts2.data))
    assert(sum(counts1.data) == sum(counts3.data))

    # Summary level mean should also be the same
    s1 = counts1.summary_statistics()[0]
    s2 = counts2.summary_statistics()[0]
    s3 = counts3.summary_statistics()[0]
    assert(s1.data[0] == s2.data[0])
    assert(s1.data[0] == s3.data[0])
Ejemplo n.º 22
0
def infer_nonwear_triaxial(x, y, z, minimum_length=timedelta(hours=1), noise_cutoff_mg=13, return_nonwear_binary=False):

    ''' Use the 3 channels of triaxial acceleration to infer periods of nonwear '''

    # Get an exhaustive list of bouts where the monitor was still
    x_intersect_y_intersect_z = infer_still_bouts_triaxial(x,y,z, noise_cutoff_mg=noise_cutoff_mg, minimum_length=minimum_length)

    # Restrict those bouts to only those with a length that exceeds the minimum length criterion
    x_intersect_y_intersect_z = Bout.limit_to_lengths(x_intersect_y_intersect_z, min_length=minimum_length)

    # Legacy code - probably going to delete this
    if return_nonwear_binary:
        # Create a parallel, binary channel indicating if that time point was in or out of wear
        nonwear_binary = Channel.channel_from_bouts(x_intersect_y_intersect_z, x.timeframe, False, "nonwear", skeleton=x)

        return (x_intersect_y_intersect_z, nonwear_binary)
    else:
        return x_intersect_y_intersect_z
Ejemplo n.º 23
0
def test_b():
    # Case B
    # First timestamp preceeds data, second doesn't

    origin = counts.timestamps[0]

    start = origin - timedelta(hours=12)
    end = origin + timedelta(hours=12)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # n should go down and missing should go up
    assert (summary_before.get_channel("AG_Counts_n").data[0] >
            summary_after.get_channel("AG_Counts_n").data[0])
    assert (summary_before.get_channel("AG_Counts_missing").data[0] <
            summary_after.get_channel("AG_Counts_missing").data[0])

    # Should only be 12 hours left
    assert (summary_after.get_channel("AG_Counts_n").data[0] == 12 * 60)

    # And 12 hours missing
    assert (summary_after.get_channel("AG_Counts_missing").data[0] == 12 * 60)
Ejemplo n.º 24
0
def test_c():
    # Case C
    # Both timestamps inside data
    origin = counts.timestamps[0]

    start = origin + timedelta(hours=6)
    end = origin + timedelta(hours=7)

    # Summarise the data before deletion
    summary_before = Time_Series.Time_Series("")
    summary_before.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    counts.delete_windows([Bout.Bout(start, end)])

    # Summarise the data after deletion
    summary_after = Time_Series.Time_Series("")
    summary_after.add_channels(
        counts.summary_statistics(statistics=[(
            "generic",
            ["sum", "n", "missing"]), ("cutpoints",
                                       [[0, 0], [0, 1], [1, 1]])]))

    # n should go down and missing should go up
    assert (summary_before.get_channel("AG_Counts_n").data[0] >
            summary_after.get_channel("AG_Counts_n").data[0])
    assert (summary_before.get_channel("AG_Counts_missing").data[0] <
            summary_after.get_channel("AG_Counts_missing").data[0])

    # Should only be 23 hours left
    assert (summary_after.get_channel("AG_Counts_n").data[0] == 23 * 60)

    # And 1 hours missing
    assert (summary_after.get_channel("AG_Counts_missing").data[0] == 1 * 60)
Ejemplo n.º 25
0
def calibrate(x,y,z, allow_overwrite=True, budget=1000, noise_cutoff_mg=13):
    """ Use still bouts in the given triaxial data to calibrate it and return the calibrated channels """

    calibration_diagnostics = OrderedDict()

    vm = channel_inference.infer_vector_magnitude(x,y,z)

    # Get a list of bouts where standard deviation in each axis is below given threshold ("still")
    still_bouts = channel_inference.infer_still_bouts_triaxial(x,y,z, noise_cutoff_mg=noise_cutoff_mg, minimum_length=timedelta(minutes=1))
    num_still_bouts = len(still_bouts)
    num_still_seconds = Bout.total_time(still_bouts).total_seconds()

    # Summarise VM in 10s intervals
    vm_windows = vm.piecewise_statistics(timedelta(seconds=10), [("generic", ["mean"])], time_period=vm.timeframe)[0]

    # Get a list where VM was between 0.5 and 1.5g ("reasonable")
    reasonable_bouts = vm_windows.bouts(0.5, 1.5)
    num_reasonable_bouts = len(reasonable_bouts)
    num_reasonable_seconds = Bout.total_time(reasonable_bouts).total_seconds()

    # We only want still bouts where the VM level was within 0.5g of 1g
    # Therefore insersect "still" time with "reasonable" time
    still_bouts = Bout.bout_list_intersection(reasonable_bouts, still_bouts)

    # And we only want bouts where it was still and reasonable for 10s or longer
    still_bouts = Bout.limit_to_lengths(still_bouts, min_length = timedelta(seconds=10))
    num_final_bouts = len(still_bouts)
    num_final_seconds = Bout.total_time(still_bouts).total_seconds()

    # Get the average X,Y,Z for each still bout (inside which, by definition, XYZ should not change)
    still_x, num_samples = x.build_statistics_channels(still_bouts, [("generic", ["mean", "n"])])
    still_y = y.build_statistics_channels(still_bouts, [("generic", ["mean"])])[0]
    still_z = z.build_statistics_channels(still_bouts, [("generic", ["mean"])])[0]

    # Get the octant positions of the points to calibrate on
    occupancy = octant_occupancy(still_x.data, still_y.data, still_z.data)

    # Are they fairly distributed?
    comparisons = {"x<0":[0,1,2,3], "x>0":[4,5,6,7], "y<0":[0,1,4,5], "y>0":[2,3,6,7], "z<0":[0,2,4,6], "z>0":[1,3,5,7]}
    for axis in ["x", "y", "z"]:
        mt = sum(occupancy[comparisons[axis + ">0"]])
        lt = sum(occupancy[comparisons[axis + "<0"]])
        calibration_diagnostics[axis + "_inequality"] = abs(mt-lt)/sum(occupancy)

    # Calculate the initial error without doing any calibration
    start_error = evaluate_solution(still_x, still_y, still_z, num_samples, [0,1,0,1,0,1])

    # Do offset and scale calibration by default
    offset_only_calibration = False
    calibration_diagnostics["calibration_method"] = "offset and scale"

    # If we have less than 500 points to calibrate with, or if more than 2 octants are empty
    if len(still_x.data) < 500 or sum(occupancy == 0) > 2:
        offset_only_calibration = True
        calibration_diagnostics["calibration_method"] = "offset only"


    # Search for the correct way to calibrate the data
    calibration_parameters = find_calibration_parameters(still_x.data, still_y.data, still_z.data, offset_only=offset_only_calibration)

    for param,value in zip("x_offset,x_scale,y_offset,y_scale,z_offset,z_scale".split(","), calibration_parameters):
        calibration_diagnostics[param] = value

    for i,occ in enumerate(occupancy):
        calibration_diagnostics["octant_"+str(i)] = occ

    # Calculate the final error after calibration
    end_error = evaluate_solution(still_x, still_y, still_z, num_samples, calibration_parameters)

    calibration_diagnostics["start_error"] = start_error
    calibration_diagnostics["end_error"] = end_error
    calibration_diagnostics["num_final_bouts"] = num_final_bouts
    calibration_diagnostics["num_final_seconds"] = num_final_seconds
    calibration_diagnostics["num_still_bouts"] = num_still_bouts
    calibration_diagnostics["num_still_seconds"] = num_still_seconds
    calibration_diagnostics["num_reasonable_bouts"] = num_reasonable_bouts
    calibration_diagnostics["num_reasonable_seconds"] = num_reasonable_seconds

    if allow_overwrite:
        # If we do not need to preserve the original x,y,z values, we can just calibrate that data

        # Apply the best calibration factors to the data
        do_calibration(x, y, z, calibration_parameters)

        return (x, y, z, calibration_diagnostics)

    else:
        # Else we create an independent copy of the raw data and calibrate that instead
        cal_x = copy.deepcopy(x)
        cal_y = copy.deepcopy(y)
        cal_z = copy.deepcopy(z)

        # Apply the best calibration factors to the data
        do_calibration(cal_x, cal_y, cal_z, calibration_parameters)

        return (cal_x, cal_y, cal_z, calibration_diagnostics)
Ejemplo n.º 26
0
def process_file(job_details):

    id_num = str(job_details["pid"])
    filename = job_details["filename"]

    filename_short = os.path.basename(filename).split('.')[0]

    meta = os.path.join(results_folder,
                        "metadata_{}.csv".format(filename_short))
    # check if analysis_meta already exists...
    if os.path.isfile(meta):
        os.remove(meta)

    battery_max = 0
    if monitor_type == "GeneActiv":
        battery_max = GA_battery_max
    elif monitor_type == "Axivity":
        battery_max = AX_battery_max

    epochs = [timedelta(minutes=n) for n in epoch_minutes]
    # Use 'epochs_minutes' variable to create the corresponding names to the epochs defined
    names = []
    plots_list = []
    for n in epoch_minutes:
        name = ""
        if n % 60 == 0:  # If the epoch is a multiple of 60, it will be named in hours, e.g. '1h'
            name = "{}h".format(int(n / 60))
        elif n % 60 != 0:  # If the epoch is NOT a multiple of 60, it will be named in seconds, e.g. '15m'
            name = "{}m".format(n)
        names.append(name)
        if n in epoch_plot:
            plots_list.append(name)

    # fast-load the data to identify any anomalies:
    qc_ts, qc_header = data_loading.fast_load(filename, monitor_type)

    qc_channels = qc_ts.get_channels(["X", "Y", "Z"])

    anomalies = diagnostics.diagnose_fix_anomalies(qc_channels,
                                                   discrepancy_threshold=2)

    # Load the data
    ts, header = data_loading.load(filename, monitor_type, compress=False)
    header["processed_file"] = os.path.basename(filename)

    # some monitors have manufacturers parameters applied to them, let's preserve these but rename them:
    var_list = [
        "x_gain", "x_offset", "y_gain", "y_offset", "z_gain", "z_offset",
        "calibration_date"
    ]
    for var in var_list:
        if var in header.keys():
            header[("manufacturers_%s" % var)] = header[var]
            header.pop(var)

    x, y, z, battery, temperature, integrity = ts.get_channels(
        ["X", "Y", "Z", "Battery", "Temperature", "Integrity"])
    initial_channels = [x, y, z, battery, temperature, integrity]

    # create dictionary of anomalies total and types
    anomalies_dict = {"QC_anomalies_total": len(anomalies)}

    # check whether any anomalies have been found:
    if len(anomalies) > 0:
        anomalies_file = os.path.join(
            results_folder, "{}_anomalies.csv".format(filename_short))
        df = pd.DataFrame(anomalies)

        for type in anomaly_types:
            anomalies_dict["QC_anomaly_{}".format(type)] = (
                df.anomaly_type.values == type).sum()

        df = df.set_index("anomaly_type")
        # print record of anomalies to anomalies_file
        df.to_csv(anomalies_file)

        # if anomalies have been found, fix these anomalies
        channels = diagnostics.fix_anomalies(anomalies, initial_channels)

    else:
        for type in anomaly_types:
            anomalies_dict["QC_anomaly_{}".format(type)] = 0
        # if no anomalies
        channels = initial_channels

    first_channel = channels[0]
    # Convert timestamps to offsets from the first timestamp
    start, offsets = Channel.timestamps_to_offsets(first_channel.timestamps)

    # As timestamps are sparse, expand them to 1 per observation
    offsets = Channel.interpolate_offsets(offsets, len(first_channel.data))

    # For each channel, convert to offset timestamps
    for c in channels:
        c.start = start
        c.set_contents(c.data, offsets, timestamp_policy="offset")

    # find approximate first and last battery percentage values
    first_battery_pct = round((battery.data[1] / battery_max) * 100, 2)
    last_battery_pct = round((battery.data[-1] / battery_max) * 100, 2)

    # Calculate the time frame to use
    start = time_utilities.start_of_day(x.timeframe[0])
    end = time_utilities.end_of_day(x.timeframe[-1])
    tp = (start, end)

    # if the sampling frequency is greater than 40Hz
    if x.frequency > 40:
        # apply a low pass filter
        x = pampro_fourier.low_pass_filter(x,
                                           20,
                                           frequency=x.frequency,
                                           order=4)
        x.name = "X"  # because LPF^ changes the name, we want to override that
        y = pampro_fourier.low_pass_filter(y,
                                           20,
                                           frequency=y.frequency,
                                           order=4)
        y.name = "Y"
        z = pampro_fourier.low_pass_filter(z,
                                           20,
                                           frequency=z.frequency,
                                           order=4)
        z.name = "Z"

    # find any bouts where data is "missing" BEFORE calibration
    missing_bouts = []
    if -111 in x.data:
        # extract the bouts of the data channels where the data == -111 (the missing value)
        missing = x.bouts(-111, -111)

        # add a buffer of 2 minutes (120 seconds) to the beginning and end of each bout
        for item in missing:

            bout_start = max(item.start_timestamp - timedelta(seconds=120),
                             x.timeframe[0])
            bout_end = min(item.end_timestamp + timedelta(seconds=120),
                           x.timeframe[1])

            new_bout = Bout.Bout(start_timestamp=bout_start,
                                 end_timestamp=bout_end)
            missing_bouts.append(new_bout)

    else:
        pass

    x.delete_windows(missing_bouts)
    y.delete_windows(missing_bouts)
    z.delete_windows(missing_bouts)
    integrity.fill_windows(missing_bouts, fill_value=1)

    ################ CALIBRATION #######################

    # extract still bouts
    calibration_ts, calibration_header = triaxial_calibration.calibrate_stepone(
        x, y, z, noise_cutoff_mg=noise_cutoff_mg)
    # Calibrate the acceleration to local gravity
    cal_diagnostics = triaxial_calibration.calibrate_steptwo(
        calibration_ts, calibration_header, calibration_statistics=False)

    # calibrate data
    triaxial_calibration.do_calibration(x,
                                        y,
                                        z,
                                        temperature=None,
                                        cp=cal_diagnostics)

    x.delete_windows(missing_bouts)
    y.delete_windows(missing_bouts)
    z.delete_windows(missing_bouts)
    temperature.delete_windows(missing_bouts)
    battery.delete_windows(missing_bouts)

    # Derive some signal features
    vm = channel_inference.infer_vector_magnitude(x, y, z)
    vm.delete_windows(missing_bouts)

    if "HPFVM" in stats:
        vm_hpf = channel_inference.infer_vm_hpf(vm)
    else:
        vm_hpf = None

    if "ENMO" in stats:
        enmo = channel_inference.infer_enmo(vm)
    else:
        enmo = None

    if "PITCH" and "ROLL" in stats:
        pitch, roll = channel_inference.infer_pitch_roll(x, y, z)
    else:
        pitch = roll = None

    # Infer nonwear and mask those data points in the signal
    nonwear_bouts = channel_inference.infer_nonwear_triaxial(
        x, y, z, noise_cutoff_mg=noise_cutoff_mg)
    for bout in nonwear_bouts:
        # Show non-wear bouts in purple
        bout.draw_properties = {'lw': 0, 'alpha': 0.75, 'facecolor': '#764af9'}

    for channel, channel_name in zip(
        [enmo, vm_hpf, pitch, roll, temperature, battery],
        ["ENMO", "HPFVM", "PITCH", "ROLL", "Temperature", "Battery"]):
        if channel_name in stats:
            # Collapse the sample data to a processing epoch (in seconds) so data is summarised
            epoch_level_channel = channel.piecewise_statistics(
                timedelta(seconds=processing_epoch), time_period=tp)[0]
            epoch_level_channel.name = channel_name
            if channel_name in ["Temperature", "Battery"]:
                pass
            else:
                epoch_level_channel.delete_windows(nonwear_bouts)
            epoch_level_channel.delete_windows(missing_bouts)
            ts.add_channel(epoch_level_channel)

        # collapse binary integrity channel
        epoch_level_channel = integrity.piecewise_statistics(
            timedelta(seconds=int(processing_epoch)),
            statistics=[("binary", ["flag"])],
            time_period=tp)[0]
        epoch_level_channel.name = "Integrity"
        epoch_level_channel.fill_windows(missing_bouts, fill_value=1)
        ts.add_channel(epoch_level_channel)

    # create and open results files
    results_files = [
        os.path.join(results_folder, "{}_{}.csv".format(name, filename_short))
        for name in names
    ]
    files = [open(file, "w") for file in results_files]

    # Write the column headers to the created files
    for f in files:
        f.write(pampro_utilities.design_file_header(stats) + "\n")

    # writing out and plotting results
    for epoch, name, f in zip(epochs, names, files):
        results_ts = ts.piecewise_statistics(epoch,
                                             statistics=stats,
                                             time_period=tp,
                                             name=id_num)
        results_ts.write_channels_to_file(file_target=f)
        f.flush()
        if name in plots_list:
            # for each statistic in the plotting dictionary, produce a plot in the results folder
            for stat, plot in plotting_dict.items():
                try:
                    results_ts[stat].add_annotations(nonwear_bouts)
                    results_ts.draw([[stat]],
                                    file_target=os.path.join(
                                        results_folder,
                                        plot.format(filename_short, name)))
                except KeyError:
                    pass

    header["processing_script"] = version
    header["analysis_resolutions"] = names
    header["noise_cutoff_mg"] = noise_cutoff_mg
    header["processing_epoch"] = processing_epoch
    header["QC_first_battery_pct"] = first_battery_pct
    header["QC_last_battery_pct"] = last_battery_pct

    metadata = {**header, **anomalies_dict, **cal_diagnostics}

    # write metadata to file
    pampro_utilities.dict_write(meta, id_num, metadata)

    for c in ts:
        del c.data
        del c.timestamps
        del c.indices
        del c.cached_indices
Ejemplo n.º 27
0
def test_extracted_bouts():

    one_bouts = counts.bouts(1, 1)
    zero_bouts = counts.bouts(0, 0)

    # Bouts where counts == 0 and counts == 1 should be mutually excluse
    # So there should be no intersections between them
    intersections = Bout.bout_list_intersection(one_bouts, zero_bouts)

    assert (len(intersections) == 0)

    # A bout that spans the whole time period should completely intersect with bouts where counts == 1
    one_big_bout = Bout.Bout(counts.timestamps[0] - timedelta(days=1),
                             counts.timestamps[-1] + timedelta(days=1))

    one_intersections = Bout.bout_list_intersection(one_bouts, [one_big_bout])
    assert (Bout.total_time(one_intersections) == Bout.total_time(one_bouts))

    # Same for zeros
    zero_intersections = Bout.bout_list_intersection(zero_bouts,
                                                     [one_big_bout])
    assert (Bout.total_time(zero_intersections) == Bout.total_time(zero_bouts))

    # Filling in the bout gaps of one bouts should recreate the zero bouts
    inverse_of_one_bouts = Bout.time_period_minus_bouts(
        (counts.timeframe[0], counts.timeframe[1] + timedelta(minutes=1)),
        one_bouts)

    # They should have the same n
    assert (len(inverse_of_one_bouts) == len(zero_bouts))

    # Same total amount of time
    assert (
        Bout.total_time(inverse_of_one_bouts) == Bout.total_time(zero_bouts))
Ejemplo n.º 28
0
def qc_analysis(job_details):

    id_num = str(job_details["pid"])
    filename = job_details["filename"]

    filename_short = os.path.basename(filename).split('.')[0]

    battery_max = 0
    if filetype == "GeneActiv":
        battery_max = GA_battery_max
    elif filetype == "Axivity":
        battery_max = AX_battery_max

    # Load the data from the hdf5 file
    ts, header = data_loading.fast_load(filename, filetype)

    header["QC_filename"] = os.path.basename(filename)

    x, y, z, battery, temperature = ts.get_channels(["X", "Y", "Z", "Battery", "Temperature"])
    
    # create a channel of battery percentage, based on the assumed battery maximum value 
    battery_pct = Channel.Channel.clone(battery)
    battery_pct.data = (battery.data / battery_max) * 100
    
    channels = [x, y, z, battery, temperature, battery_pct]
    
    anomalies = diagnostics.diagnose_fix_anomalies(channels, discrepancy_threshold=2)

    # create dictionary of anomalies types
    anomalies_dict = dict()
                        
    # check whether any anomalies have been found:
    if len(anomalies) > 0:
        anomalies_file = os.path.join(anomalies_folder, "{}_anomalies.csv".format(filename_short))
        df = pd.DataFrame(anomalies)
        
        for type in anomaly_types:
            anomalies_dict["QC_anomaly_{}".format(type)] = (df.anomaly_type.values == type).sum()
        
        df = df.set_index("anomaly_type")
        # print record of anomalies to anomalies_file
        df.to_csv(anomalies_file)
        
    else:
        for type in anomaly_types:
            anomalies_dict["QC_anomaly_{}".format(type)] = 0
        
    # check for axis anomalies
    axes_dict = diagnostics.diagnose_axes(x, y, z, noise_cutoff_mg=13)
    
    axis_anomaly = False
    
    for key, val in axes_dict.items():
        anomalies_dict["QC_{}".format(key)] = val
        if key.endswith("max"):
            if val > axis_max:
                axis_anomaly = True
        elif key.endswith("min"):
            if val < axis_min:
                axis_anomaly = True

    # create a "check battery" flag:
    check_battery = False

    # calculate first and last battery percentages
    first_battery_pct = round((battery_pct.data[1]),2)
    last_battery_pct = round((battery_pct.data[-1]),2)
    header["QC_first_battery_pct"] = first_battery_pct
    header["QC_last_battery_pct"] = last_battery_pct
    
    # calculate lowest battery percentage
    # check if battery.pct has a missing_value, exclude those values if they exist
    if battery_pct.missing_value == "None":
        lowest_battery_pct = min(battery_pct.data)
    else:
        test_array = np.delete(battery_pct.data, np.where(battery_pct.data == battery_pct.missing_value))
        lowest_battery_pct = min(test_array)
    
    header["QC_lowest_battery_pct"] = round(lowest_battery_pct,2)
    header["QC_lowest_battery_threshold"] = battery_minimum
        
    # find the maximum battery discharge in any 24hr period:    
    max_discharge = battery_pct.channel_max_decrease(time_period=timedelta(hours=discharge_hours))
    header["QC_max_discharge"] = round(max_discharge, 2)
    header["QC_discharge_time_period"] = "{} hours".format(discharge_hours)
    header["QC_discharge_threshold"] = discharge_pct

    # change flag if lowest battery percentage dips below battery_minimum at any point 
    # OR maximum discharge greater than discharge_pct over time period "hours = discharge_hours"
    if lowest_battery_pct < battery_minimum or max_discharge > discharge_pct:
        check_battery = True
        
    header["QC_check_battery"] = str(check_battery)
    header["QC_axis_anomaly"] = str(axis_anomaly)

    # Calculate the time frame to use
    start = time_utilities.start_of_day(x.timeframe[0])
    end = time_utilities.end_of_day(x.timeframe[-1])
    tp = (start, end)

    results_ts = Time_Series.Time_Series("")

    # Derive some signal features
    vm = channel_inference.infer_vector_magnitude(x, y, z)
    enmo = channel_inference.infer_enmo(vm)
    enmo.minimum = 0
    enmo.maximum = enmo_max

    # Infer nonwear
    nonwear_bouts = channel_inference.infer_nonwear_for_qc(x, y, z, noise_cutoff_mg=noise_cutoff_mg)
    # Use nonwear bouts to calculate wear bouts
    wear_bouts = Bout.time_period_minus_bouts(enmo.timeframe, nonwear_bouts)

    # Use wear bouts to calculate the amount of wear time in the file in hours, save to meta data
    total_wear = Bout.total_time(wear_bouts)
    total_seconds_wear = total_wear.total_seconds()
    total_hours_wear = round(total_seconds_wear/3600)
    header["QC_total_hours_wear"] = total_hours_wear

    # Split the enmo channel into lists of bouts for each quadrant:
    ''' quadrant_0 = 00:00 -> 06: 00
        quadrant_1 = 06:00 -> 12: 00
        quadrant_2 = 12:00 -> 18: 00
        quadrant_3 = 18:00 -> 00: 00 '''
    q_0, q_1, q_2, q_3 = channel_inference.create_quadrant_bouts(enmo)

    # calculate the intersection of each set of bouts with wear_bouts, then calculate the wear time in each quadrant.
    sum_quadrant_wear = 0
    for quadrant, name1, name2 in ([q_0, "QC_hours_wear_quadrant_0", "QC_pct_wear_quadrant_0"],
                                   [q_1, "QC_hours_wear_quadrant_1", "QC_pct_wear_quadrant_1"],
                                   [q_2, "QC_hours_wear_quadrant_2", "QC_pct_wear_quadrant_2"],
                                   [q_3, "QC_hours_wear_quadrant_3", "QC_pct_wear_quadrant_3"]):
        quadrant_wear = Bout.bout_list_intersection(quadrant, wear_bouts)
        seconds_wear = Bout.total_time(quadrant_wear).total_seconds()
        hours_wear = round(seconds_wear / 3600)
        header[name1] = hours_wear
        header[name2] = round(((hours_wear / total_hours_wear) * 100), 2)

    for bout in nonwear_bouts:
        # Show non-wear bouts in purple
        bout.draw_properties = {'lw': 0, 'alpha': 0.75, 'facecolor': '#764af9'}

    for channel, channel_name in zip([enmo, battery_pct],["ENMO", "Battery_percentage"]):
        channel.name = channel_name
        results_ts.add_channel(channel)

    if PLOT == "YES":    
        # Plot statistics as subplots in one plot file per data file
        results_ts["ENMO"].add_annotations(nonwear_bouts)
        results_ts.draw_qc(plotting_df, file_target=os.path.join(charts_folder,"{}_plots.png".format(filename_short)))

    header["QC_script"] = version
    
    # file of metadata from qc process
    qc_output = os.path.join(results_folder, "qc_meta_{}.csv".format(filename_short))
    # check if qc_output already exists...
    if os.path.isfile(qc_output):
        os.remove(qc_output)
    
    metadata = {**header, **anomalies_dict}
    
    # write metadata to file
    pampro_utilities.dict_write(qc_output, id_num, metadata)

    for c in ts:
        del c.data
        del c.timestamps
        del c.indices
        del c.cached_indices