def storeCalibrationParams(summary, xOff, yOff, zOff, xSlope, ySlope, zSlope,
        xTemp, yTemp, zTemp, meanTemp):
    """Store calibration parameters to output summary dictionary

    :param dict summary: Output dictionary containing all summary metrics
    :param float xOff: x intercept
    :param float yOff: y intercept
    :param float zOff: z intercept
    :param float xSlope: x slope
    :param float ySlope: y slope
    :param float zSlope: z slope
    :param float xTemp: x temperature
    :param float yTemp: y temperature
    :param float zTemp: z temperature
    :param float meanTemp: Calibration mean temperature in file

    :return: Calibration summary values written to dict <summary>
    :rtype: void
    """

    # store output to summary dictionary
    summary['calibration-xOffset(g)'] = accUtils.formatNum(xOff, 4)
    summary['calibration-yOffset(g)'] = accUtils.formatNum(yOff, 4)
    summary['calibration-zOffset(g)'] = accUtils.formatNum(zOff, 4)
    summary['calibration-xSlope(g)'] = accUtils.formatNum(xSlope, 4)
    summary['calibration-ySlope(g)'] = accUtils.formatNum(ySlope, 4)
    summary['calibration-zSlope(g)'] = accUtils.formatNum(zSlope, 4)
    summary['calibration-xTemp(C)'] = accUtils.formatNum(xTemp, 4)
    summary['calibration-yTemp(C)'] = accUtils.formatNum(yTemp, 4)
    summary['calibration-zTemp(C)'] = accUtils.formatNum(zTemp, 4)
    summary['calibration-meanDeviceTemp(C)'] = accUtils.formatNum(meanTemp, 2)
def calculateECDF(e, inputCol, summary):
    """Calculate activity intensity empirical cumulative distribution

    The input data must not be imputed, as ECDF requires different imputation
    where nan/non-wear data segments are IMPUTED FOR EACH INTENSITY LEVEL. Here,
    the average of similar time-of-day values is imputed with one minute 
    granularity on different days of the measurement. Following intensity levels
    are calculated
    1mg bins from 1-20mg
    5mg bins from 25-100mg
    25mg bins from 125-500mg
    100mg bins from 500-2000mg

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param str inputCol: Column to calculate intensity distribution on
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys '<inputCol>-ecdf-<level...>mg'
    :rtype: void
    """

    ecdf1, step = np.linspace(1, 20, 20, retstep=True)  # 1mg bins from 1-20mg
    ecdf2, step = np.linspace(25, 100, 16, retstep=True)  # 5mg bins from 25-100mg
    ecdf3, step = np.linspace(125, 500, 16, retstep=True)  # 25mg bins from 125-500mg
    ecdf4, step = np.linspace(600, 2000, 15, retstep=True)  # 100mg bins from 500-2000mg
    ecdfXVals = np.concatenate([ecdf1, ecdf2, ecdf3, ecdf4])

    # remove NaNs (necessary for statsmodels.api)
    ecdfData = e[['hour', 'minute', inputCol]][~np.isnan(e[inputCol])]
    if len(ecdfData) > 0:
        # set column names for actual, imputed, and adjusted intensity dist. vals
        cols = []
        colsImputed = []
        colsAdjusted = []
        for xVal in ecdfXVals:
            col = 'ecdf' + str(xVal)
            cols.append(col)
            colsImputed.append(col + 'Imputed')
            colsAdjusted.append(col + 'Adjusted')
            ecdfData[col] = (ecdfData[inputCol] <= xVal) * 1.0
        # calculate imputation values to replace nan metric values
        wearTimeWeights = ecdfData.groupby(['hour', 'minute'])[cols].mean()
        ecdfData = ecdfData.join(wearTimeWeights, on=['hour', 'minute'],
                                rsuffix='Imputed')
        # for each ecdf xVal column, apply missing data imputation
        for col, imputed, adjusted in zip(cols, colsImputed, colsAdjusted):
            ecdfData[adjusted] = ecdfData[col].fillna(ecdfData[imputed])

        accEcdf = ecdfData[colsAdjusted].mean()
    else:
        accEcdf = pd.Series(data=[0.0 for i in ecdfXVals],
                            index=[str(i)+'Adjusted' for i in ecdfXVals])
    
    # and write to summary dict
    for x, ecdf in zip(ecdfXVals, accEcdf):
        summary[inputCol + '-ecdf-' + str(accUtils.formatNum(x,0)) + 'mg'] = \
            accUtils.formatNum(ecdf, 4)
def get_interrupts(e, epochPeriod, summary):
    """Identify if there are interrupts in the data recording

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins'
    :rtype: void
    """

    e.dropna(subset=['enmoTrunc', 'xStd', 'yStd', 'zStd'],
             how='all',
             inplace=True)
    epochNs = epochPeriod * np.timedelta64(1, 's')
    interrupts = np.where(np.diff(np.array(e.index)) > epochNs)[0]
    # get duration of each interrupt in minutes
    interruptMins = []
    for i in interrupts:
        interruptMins.append(
            np.diff(np.array(e[i:i + 2].index)) / np.timedelta64(1, 'm'))
    # record to output summary
    summary['errs-interrupts-num'] = len(interruptMins)
    summary['errs-interrupt-mins'] = accUtils.formatNum(
        np.sum(interruptMins), 1)
예제 #4
0
def get_interrupts(e, epochPeriod, summary):
    """Identify if there are interrupts in the data recording

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins'
    :rtype: void
    """

    epochNs = epochPeriod * np.timedelta64(1, 's')
    interrupts = np.where(e.index.to_series(keep_tz=True).diff() > epochNs)[0]
    # Get duration of each interrupt in minutes
    interruptMins = []
    for i in interrupts:
        interruptMins.append(e.index[i-1:i+1].to_series(keep_tz=True).diff() /
         np.timedelta64(1, 'm'))
    # Record to output summary
    summary['errs-interrupts-num'] = len(interruptMins)
    summary['errs-interrupt-mins'] = accUtils.formatNum(np.sum(interruptMins), 1)

    frames = [e]
    for i in interrupts:
        start, end = e.index[i-1:i+1]
        dti = pd.date_range(start=start, end=end, freq=str(epochPeriod)+'s')[1:-1]
        frames.append(dti.to_frame().drop(columns=0))
    e = pd.concat(frames).sort_index()

    return e
예제 #5
0
def get_interrupts(e, epochPeriod, summary):
    """Identify if there are interrupts in the data recording

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins'
    :rtype: void
    """

    epochNs = epochPeriod * np.timedelta64(1, 's')
    interrupts = np.where(np.diff(np.array(e.index)) > epochNs)[0]
    # get duration of each interrupt in minutes
    interruptMins = []
    for i in interrupts:
        interruptMins.append(
            np.diff(np.array(e[i:i + 2].index)) / np.timedelta64(1, 'm'))
    # record to output summary
    summary['errs-interrupts-num'] = len(interruptMins)
    summary['errs-interrupt-mins'] = accUtils.formatNum(
        np.sum(interruptMins), 1)

    for i in interrupts:
        start, end = e[i:i + 2].index
        dti = pd.date_range(start=start, end=end,
                            freq=str(epochPeriod) + 's')[1:-1]
        e = e.append(dti.to_frame())
    e = e.sort_index()

    return e
예제 #6
0
def writeMovementSummaries(e, labels, summary, useRecommendedImputation):
    """Write overall summary stats for each activity type to summary dict

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param list(str) labels: Activity state labels
    :param dict summary: Output dictionary containing all summary metrics
    :param bool useRecommendedImputation: Highly recommended method to impute
        missing data using data from other days around the same time

    :return: Write dict <summary> keys for each activity type 'overall-<avg/sd>',
        'week<day/end>-avg', '<day..>-avg', 'hourOfDay-<hr..>-avg',
        'hourOfWeek<day/end>-<hr..>-avg'
    :rtype: void
    """

    # Identify activity types to summarise
    activityTypes = ['acc', 'CutPointMVPA', 'CutPointVPA']
    activityTypes += labels
    if 'MET' in e.columns:
        activityTypes.append('MET')

    # Sumarise each type by: overall, week day/end, day, and hour of day
    for accType in activityTypes:
        col = accType
        if useRecommendedImputation:
            col += 'Imputed'
        if accType in ['CutPointMVPA', 'CutPointVPA']:
            col = accType

        # Overall / weekday / weekend summaries
        summary[accType + '-overall-avg'] = accUtils.formatNum(
            e[col].mean(), 5)
        summary[accType + '-overall-sd'] = accUtils.formatNum(e[col].std(), 2)
        summary[accType + '-weekday-avg'] = accUtils.formatNum(
            e[col][e.index.weekday <= 4].mean(), 2)
        summary[accType + '-weekend-avg'] = accUtils.formatNum(
            e[col][e.index.weekday >= 5].mean(), 2)

        # Daily summary
        for i, day in zip(range(0, 7), accUtils.DAYS):
            summary[accType + '-' + day + '-avg'] = accUtils.formatNum(
                e[col][e.index.weekday == i].mean(), 2)

        # Hourly summaries
        for i in range(0, 24):
            hourOfDay = accUtils.formatNum(e[col][e.index.hour == i].mean(), 2)
            hourOfWeekday = accUtils.formatNum(
                e[col][(e.index.weekday <= 4) & (e.index.hour == i)].mean(), 2)
            hourOfWeekend = accUtils.formatNum(
                e[col][(e.index.weekday >= 5) & (e.index.hour == i)].mean(), 2)
            # Write derived hourly values to summary dictionary
            summary[accType + '-hourOfDay-' + str(i) + '-avg'] = hourOfDay
            summary[accType + '-hourOfWeekday-' + str(i) +
                    '-avg'] = hourOfWeekday
            summary[accType + '-hourOfWeekend-' + str(i) +
                    '-avg'] = hourOfWeekend
def storeCalibrationInformation(summary, bestIntercept, bestSlope,
        bestTemp, meanTemp, initError, bestError, xMin, xMax, yMin, yMax, zMin,
        zMax, nStatic, calibrationSphereCriteria = 0.3):
    """Store calibration information to output summary dictionary

    :param dict summary: Output dictionary containing all summary metrics
    :param list(float) bestIntercept: Best x/y/z intercept values
    :param list(float) bestSlope: Best x/y/z slope values
    :param list(float) bestTemperature: Best x/y/z temperature values
    :param float meanTemp: Calibration mean temperature in file
    :param float initError: Root mean square error (in mg) before calibration
    :param float initError: Root mean square error (in mg) after calibration
    :param float xMin: xMin information on spread of stationary points
    :param float xMax: xMax information on spread of stationary points
    :param float yMin: yMin information on spread of stationary points
    :param float yMax: yMax information on spread of stationary points
    :param float zMin: zMin information on spread of stationary points
    :param float zMax: zMax information on spread of stationary points
    :param int nStatic: number of stationary points used for calibration
    :param float calibrationSphereCriteria: Threshold to check how well file was
        calibrated

    :return: Calibration summary values written to dict <summary>
    :rtype: void
    """

    # store output to summary dictionary
    summary['calibration-errsBefore(mg)'] = accUtils.formatNum(initError*1000, 2)
    summary['calibration-errsAfter(mg)'] = accUtils.formatNum(bestError*1000, 2)
    storeCalibrationParams(summary, bestIntercept[0], bestIntercept[1],
            bestIntercept[2], bestSlope[0], bestSlope[1], bestSlope[2],
            bestTemp[0], bestTemp[1], bestTemp[2], meanTemp)
    summary['calibration-numStaticPoints'] = nStatic
    summary['calibration-staticXmin(g)'] = accUtils.formatNum(xMin, 2)
    summary['calibration-staticXmax(g)'] = accUtils.formatNum(xMax, 2)
    summary['calibration-staticYmin(g)'] = accUtils.formatNum(yMin, 2)
    summary['calibration-staticYmax(g)'] = accUtils.formatNum(yMax, 2)
    summary['calibration-staticZmin(g)'] = accUtils.formatNum(zMin, 2)
    summary['calibration-staticZmax(g)'] = accUtils.formatNum(zMax, 2)
    # check how well calibrated file was
    summary['quality-calibratedOnOwnData'] = 1
    summary['quality-goodCalibration'] = 1
    s = calibrationSphereCriteria
    try:
        if xMin > -s or xMax < s or yMin > -s or yMax < s or zMin > -s or zMax < s or \
                np.isnan(xMin) or np.isnan(yMin) or np.isnan(zMin):
            summary['quality-goodCalibration'] = 0
    except UnboundLocalError:
        summary['quality-goodCalibration'] = 0
def writeMovementSummaries(e, labels, summary):
    """Write overall summary stats for each activity type to summary dict

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param list(str) labels: Activity state labels
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys for each activity type 'overall-<avg/sd>',
        'week<day/end>-avg', '<day..>-avg', 'hourOfDay-<hr..>-avg', 
        'hourOfWeek<day/end>-<hr..>-avg'
    :rtype: void
    """

    # identify activity types to summarise
    activityTypes = ['acc', 'MVPA', 'VPA']
    activityTypes += labels
    if 'MET' in e.columns:
        activityTypes.append('MET')

    # sumarise each type by: overall, week day/end, day, and hour of day
    for accType in activityTypes:
        col = accType + 'Imputed'
        if accType in ['MVPA', 'VPA']:
            col = accType
        
        # overall / weekday / weekend summaries
        summary[accType + '-overall-avg'] = accUtils.formatNum(e[col].mean(), 2)
        summary[accType + '-overall-sd'] = accUtils.formatNum(e[col].std(), 2)
        summary[accType + '-weekday-avg'] = accUtils.formatNum( \
            e[col][e.index.weekday<=4].mean(), 2)
        summary[accType + '-weekend-avg'] = accUtils.formatNum( \
            e[col][e.index.weekday>=5].mean(), 2)
        
        # daily summary
        for i, day in zip(range(0, 7), accUtils.DAYS):
            summary[accType + '-' + day + '-avg'] = accUtils.formatNum( \
                e[col][e.index.weekday == i].mean(), 2)
        
        # hourly summaries
        for i in range(0, 24):
            hourOfDay = accUtils.formatNum(e[col][e.index.hour == i].mean(), 2)
            hourOfWeekday = accUtils.formatNum( \
                e[col][(e.index.weekday<=4) & (e.index.hour == i)].mean(), 2)
            hourOfWeekend = accUtils.formatNum( \
                e[col][(e.index.weekday>=5) & (e.index.hour == i)].mean(), 2)
            # write derived hourly values to summary dictionary
            summary[accType + '-hourOfDay-' + str(i) + '-avg'] = hourOfDay
            summary[accType + '-hourOfWeekday-' + str(i) + '-avg'] = hourOfWeekday
            summary[accType + '-hourOfWeekend-' + str(i) + '-avg'] = hourOfWeekend
예제 #9
0
def storeCalibrationParams(summary, xyzOff, xyzSlope, xyzTemp, meanTemp):
    """Store calibration parameters to output summary dictionary

    :param dict summary: Output dictionary containing all summary metrics
    :param list(float) xyzOff: intercept [x, y, z]
    :param list(float) xyzSlope: slope [x, y, z]
    :param list(float) xyzTemp: temperature [x, y, z]
    :param float meanTemp: Calibration mean temperature in file

    :return: Calibration summary values written to dict <summary>
    :rtype: void
    """

    # store output to summary dictionary
    summary['calibration-xOffset(g)'] = accUtils.formatNum(xyzOff[0], 4)
    summary['calibration-yOffset(g)'] = accUtils.formatNum(xyzOff[1], 4)
    summary['calibration-zOffset(g)'] = accUtils.formatNum(xyzOff[2], 4)
    summary['calibration-xSlope(g)'] = accUtils.formatNum(xyzSlope[0], 4)
    summary['calibration-ySlope(g)'] = accUtils.formatNum(xyzSlope[1], 4)
    summary['calibration-zSlope(g)'] = accUtils.formatNum(xyzSlope[2], 4)
    summary['calibration-xTemp(C)'] = accUtils.formatNum(xyzTemp[0], 4)
    summary['calibration-yTemp(C)'] = accUtils.formatNum(xyzTemp[1], 4)
    summary['calibration-zTemp(C)'] = accUtils.formatNum(xyzTemp[2], 4)
    summary['calibration-meanDeviceTemp(C)'] = accUtils.formatNum(meanTemp, 2)
def get_interrupts(e, epochPeriod, summary):
    """Identify if there are interrupts in the data recording

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins'
    :rtype: void
    """

    e.dropna(subset=['enmoTrunc', 'xStd', 'yStd', 'zStd'], how='all',
        inplace=True)
    epochNs = epochPeriod * np.timedelta64(1, 's')
    interrupts = np.where(np.diff(np.array(e.index)) > epochNs)[0]
    # get duration of each interrupt in minutes
    interruptMins = []
    for i in interrupts:
        interruptMins.append( np.diff(np.array(e[i:i+2].index)) /
                np.timedelta64(1, 'm') )
    # record to output summary
    summary['errs-interrupts-num'] = len(interruptMins)
    summary['errs-interrupt-mins'] = accUtils.formatNum(np.sum(interruptMins), 1)
def calculateECDF(e, inputCol, summary):
    """Calculate activity intensity empirical cumulative distribution

    The input data must not be imputed, as ECDF requires different imputation
    where nan/non-wear data segments are IMPUTED FOR EACH INTENSITY LEVEL. Here,
    the average of similar time-of-day values is imputed with one minute 
    granularity on different days of the measurement. Following intensity levels
    are calculated
    1mg bins from 1-20mg
    5mg bins from 25-100mg
    25mg bins from 125-500mg
    100mg bins from 500-2000mg

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param str inputCol: Column to calculate intensity distribution on
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys '<inputCol>-ecdf-<level...>mg'
    :rtype: void
    """

    ecdf1, step = np.linspace(1, 20, 20, retstep=True)  # 1mg bins from 1-20mg
    ecdf2, step = np.linspace(25, 100, 16,
                              retstep=True)  # 5mg bins from 25-100mg
    ecdf3, step = np.linspace(125, 500, 16,
                              retstep=True)  # 25mg bins from 125-500mg
    ecdf4, step = np.linspace(600, 2000, 15,
                              retstep=True)  # 100mg bins from 500-2000mg
    ecdfXVals = np.concatenate([ecdf1, ecdf2, ecdf3, ecdf4])

    # remove NaNs (necessary for statsmodels.api)
    ecdfData = e[['hour', 'minute', inputCol]][~np.isnan(e[inputCol])]
    if len(ecdfData) > 0:
        # set column names for actual, imputed, and adjusted intensity dist. vals
        cols = []
        colsImputed = []
        colsAdjusted = []
        for xVal in ecdfXVals:
            col = 'ecdf' + str(xVal)
            cols.append(col)
            colsImputed.append(col + 'Imputed')
            colsAdjusted.append(col + 'Adjusted')
            ecdfData[col] = (ecdfData[inputCol] <= xVal) * 1.0
        # calculate imputation values to replace nan metric values
        wearTimeWeights = ecdfData.groupby(['hour', 'minute'])[cols].mean()
        ecdfData = ecdfData.join(wearTimeWeights,
                                 on=['hour', 'minute'],
                                 rsuffix='Imputed')
        # for each ecdf xVal column, apply missing data imputation
        for col, imputed, adjusted in zip(cols, colsImputed, colsAdjusted):
            ecdfData[adjusted] = ecdfData[col].fillna(ecdfData[imputed])

        accEcdf = ecdfData[colsAdjusted].mean()
    else:
        accEcdf = pd.Series(data=[0.0 for i in ecdfXVals],
                            index=[str(i) + 'Adjusted' for i in ecdfXVals])

    # and write to summary dict
    for x, ecdf in zip(ecdfXVals, accEcdf):
        summary[inputCol + '-ecdf-' + str(accUtils.formatNum(x,0)) + 'mg'] = \
            accUtils.formatNum(ecdf, 4)
def get_wear_time_stats(e, epochPeriod, maxStd, minDuration, nonWearFile,
                        summary):
    """Calculate nonWear time, write episodes to file, and return wear statistics

    If daylight savings crossover, update times after time-change by +/- 1hr.
    Also, if Autumn crossover time, remove last 1hr chunk before time-change.

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param int maxStd: Threshold (in mg units) for stationary vs not
    :param int minDuration: Minimum duration of nonwear events (minutes)
    :param str nonWearFile: Output filename for non wear .csv.gz episodes
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys 'wearTime-numNonWearEpisodes(>1hr)', 
        'wearTime-overall(days)', 'nonWearTime-overall(days)', 'wearTime-diurnalHrs',
        'wearTime-diurnalMins', 'quality-goodWearTime', 'wearTime-<day...>', and
        'wearTime-hourOfDay-<hr...>'
    :rtype: void

    :return: Write .csv.gz non wear episodes file to <nonWearFile>
    :rtype: void
    """

    maxStd = maxStd / 1000.0  # java uses Gravity units (not mg)
    e['nw'] = np.where(
        (e['xStd'] < maxStd) & (e['yStd'] < maxStd) & (e['zStd'] < maxStd), 1,
        0)
    starts = e.index[(e['nw'] == True)
                     & (e['nw'].shift(1).fillna(False) == False)]
    ends = e.index[(e['nw'] == True)
                   & (e['nw'].shift(-1).fillna(False) == False)]
    nonWearEpisodes = [(start, end) for start, end in zip(starts, ends)
                       if end > start + np.timedelta64(minDuration, 'm')]

    # set nonWear data to nan and record to nonWearBouts file
    f = gzip.open(nonWearFile, 'wb')
    f.write('start,end,xStdMax,yStdMax,zStdMax\n'.encode())
    timeFormat = '%Y-%m-%d %H:%M:%S'
    for episode in nonWearEpisodes:
        tmp = e[['xStd', 'yStd', 'zStd']][episode[0]:episode[1]]
        nonWearBout = episode[0].strftime(timeFormat) + ','
        nonWearBout += episode[1].strftime(timeFormat) + ','
        nonWearBout += str(tmp['xStd'].mean()) + ','
        nonWearBout += str(tmp['yStd'].mean()) + ','
        nonWearBout += str(tmp['zStd'].mean()) + '\n'
        f.write(nonWearBout.encode())
        # set main dataframe values to nan
        e[episode[0]:episode[1]] = np.nan
    f.close()
    # write to summary
    summary['wearTime-numNonWearEpisodes(>1hr)'] = int(len(nonWearEpisodes))

    # calculate wear statistics
    wearSamples = e['enmoTrunc'].count()
    nonWearSamples = len(e[np.isnan(e['enmoTrunc'])].index.values)
    wearTimeMin = wearSamples * epochPeriod / 60.0
    nonWearTimeMin = nonWearSamples * epochPeriod / 60.0
    # write to summary
    summary['wearTime-overall(days)'] = accUtils.formatNum(
        wearTimeMin / 1440.0, 2)
    summary['nonWearTime-overall(days)'] = accUtils.formatNum(
        nonWearTimeMin / 1440.0, 2)

    # get wear time in each of 24 hours across week
    epochsInMin = 60.0 / epochPeriod
    for i, day in zip(range(0, 7), accUtils.DAYS):
        dayWear = e['enmoTrunc'][e.index.weekday == i].count() / epochsInMin
        # write to summary
        summary['wearTime-' + day + '(hrs)'] = accUtils.formatNum(
            dayWear / 60.0, 2)
    for i in range(0, 24):
        hourWear = e['enmoTrunc'][e.index.hour == i].count() / epochsInMin
        # write to summary
        summary['wearTime-hourOfDay' + str(i) + '-(hrs)'] = \
            accUtils.formatNum(hourWear/60.0, 2)
    summary['wearTime-diurnalHrs'] = accUtils.formatNum( \
        e['enmoTrunc'].groupby(e.index.hour).mean().count(), 2)
    summary['wearTime-diurnalMins'] = accUtils.formatNum( \
        e['enmoTrunc'].groupby([e.index.hour, e.index.minute]).mean().count(), 2)

    # write binary decision on whether weartime was good or not
    minDiurnalHrs = 24
    minWearDays = 3
    summary['quality-goodWearTime'] = 1
    if summary['wearTime-diurnalHrs'] < minDiurnalHrs or \
         summary['wearTime-overall(days)'] < minWearDays:
        summary['quality-goodWearTime'] = 0
def get_wear_time_stats(e, epochPeriod, maxStd, minDuration, nonWearFile,
    summary):
    """Calculate nonWear time, write episodes to file, and return wear statistics

    If daylight savings crossover, update times after time-change by +/- 1hr.
    Also, if Autumn crossover time, remove last 1hr chunk before time-change.

    :param pandas.DataFrame e: Pandas dataframe of epoch data
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param int maxStd: Threshold (in mg units) for stationary vs not
    :param int minDuration: Minimum duration of nonwear events (minutes)
    :param str nonWearFile: Output filename for non wear .csv.gz episodes
    :param dict summary: Output dictionary containing all summary metrics

    :return: Write dict <summary> keys 'wearTime-numNonWearEpisodes(>1hr)', 
        'wearTime-overall(days)', 'nonWearTime-overall(days)', 'wearTime-diurnalHrs',
        'wearTime-diurnalMins', 'quality-goodWearTime', 'wearTime-<day...>', and
        'wearTime-hourOfDay-<hr...>'
    :rtype: void

    :return: Write .csv.gz non wear episodes file to <nonWearFile>
    :rtype: void
    """

    maxStd = maxStd / 1000.0 # java uses Gravity units (not mg)
    e['nw'] = np.where((e['xStd']<maxStd) & (e['yStd']<maxStd) &
            (e['zStd']<maxStd), 1, 0)
    starts = e.index[(e['nw']==True) & (e['nw'].shift(1).fillna(False)==False)]
    ends = e.index[(e['nw']==True) & (e['nw'].shift(-1).fillna(False)==False)]
    nonWearEpisodes = [(start, end) for start, end in zip(starts, ends)
            if end > start + np.timedelta64(minDuration,'m')]
    
    # set nonWear data to nan and record to nonWearBouts file
    f = gzip.open(nonWearFile,'wb')
    f.write('start,end,xStdMax,yStdMax,zStdMax\n'.encode())
    timeFormat = '%Y-%m-%d %H:%M:%S'
    for episode in nonWearEpisodes:
        tmp = e[['xStd','yStd','zStd']][episode[0]:episode[1]]
        nonWearBout = episode[0].strftime(timeFormat) + ','
        nonWearBout += episode[1].strftime(timeFormat) + ','
        nonWearBout += str(tmp['xStd'].mean()) + ','
        nonWearBout += str(tmp['yStd'].mean()) + ','
        nonWearBout += str(tmp['zStd'].mean()) + '\n'
        f.write(nonWearBout.encode())
        # set main dataframe values to nan
        e[episode[0]:episode[1]] = np.nan
    f.close()
    # write to summary
    summary['wearTime-numNonWearEpisodes(>1hr)'] = int(len(nonWearEpisodes))
    
    # calculate wear statistics
    wearSamples = e['enmoTrunc'].count()
    nonWearSamples = len(e[np.isnan(e['enmoTrunc'])].index.values)
    wearTimeMin = wearSamples * epochPeriod / 60.0
    nonWearTimeMin = nonWearSamples * epochPeriod / 60.0
    # write to summary
    summary['wearTime-overall(days)'] = accUtils.formatNum(wearTimeMin/1440.0, 2)
    summary['nonWearTime-overall(days)'] = accUtils.formatNum(nonWearTimeMin/1440.0, 2)

    # get wear time in each of 24 hours across week
    epochsInMin = 60.0 / epochPeriod
    for i, day in zip(range(0, 7), accUtils.DAYS):
        dayWear = e['enmoTrunc'][e.index.weekday == i].count() / epochsInMin
        # write to summary
        summary['wearTime-' + day + '(hrs)'] = accUtils.formatNum(dayWear/60.0, 2)
    for i in range(0, 24):
        hourWear = e['enmoTrunc'][e.index.hour == i].count() / epochsInMin
        # write to summary
        summary['wearTime-hourOfDay' + str(i) + '-(hrs)'] = \
            accUtils.formatNum(hourWear/60.0, 2)
    summary['wearTime-diurnalHrs'] = accUtils.formatNum( \
        e['enmoTrunc'].groupby(e.index.hour).mean().count(), 2)
    summary['wearTime-diurnalMins'] = accUtils.formatNum( \
        e['enmoTrunc'].groupby([e.index.hour, e.index.minute]).mean().count(), 2)

    # write binary decision on whether weartime was good or not
    minDiurnalHrs = 24
    minWearDays = 3
    summary['quality-goodWearTime'] = 1
    if summary['wearTime-diurnalHrs'] < minDiurnalHrs or \
         summary['wearTime-overall(days)'] < minWearDays:
        summary['quality-goodWearTime'] = 0