def storeCalibrationParams(summary, xOff, yOff, zOff, xSlope, ySlope, zSlope, xTemp, yTemp, zTemp, meanTemp): """Store calibration parameters to output summary dictionary :param dict summary: Output dictionary containing all summary metrics :param float xOff: x intercept :param float yOff: y intercept :param float zOff: z intercept :param float xSlope: x slope :param float ySlope: y slope :param float zSlope: z slope :param float xTemp: x temperature :param float yTemp: y temperature :param float zTemp: z temperature :param float meanTemp: Calibration mean temperature in file :return: Calibration summary values written to dict <summary> :rtype: void """ # store output to summary dictionary summary['calibration-xOffset(g)'] = accUtils.formatNum(xOff, 4) summary['calibration-yOffset(g)'] = accUtils.formatNum(yOff, 4) summary['calibration-zOffset(g)'] = accUtils.formatNum(zOff, 4) summary['calibration-xSlope(g)'] = accUtils.formatNum(xSlope, 4) summary['calibration-ySlope(g)'] = accUtils.formatNum(ySlope, 4) summary['calibration-zSlope(g)'] = accUtils.formatNum(zSlope, 4) summary['calibration-xTemp(C)'] = accUtils.formatNum(xTemp, 4) summary['calibration-yTemp(C)'] = accUtils.formatNum(yTemp, 4) summary['calibration-zTemp(C)'] = accUtils.formatNum(zTemp, 4) summary['calibration-meanDeviceTemp(C)'] = accUtils.formatNum(meanTemp, 2)
def calculateECDF(e, inputCol, summary): """Calculate activity intensity empirical cumulative distribution The input data must not be imputed, as ECDF requires different imputation where nan/non-wear data segments are IMPUTED FOR EACH INTENSITY LEVEL. Here, the average of similar time-of-day values is imputed with one minute granularity on different days of the measurement. Following intensity levels are calculated 1mg bins from 1-20mg 5mg bins from 25-100mg 25mg bins from 125-500mg 100mg bins from 500-2000mg :param pandas.DataFrame e: Pandas dataframe of epoch data :param str inputCol: Column to calculate intensity distribution on :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys '<inputCol>-ecdf-<level...>mg' :rtype: void """ ecdf1, step = np.linspace(1, 20, 20, retstep=True) # 1mg bins from 1-20mg ecdf2, step = np.linspace(25, 100, 16, retstep=True) # 5mg bins from 25-100mg ecdf3, step = np.linspace(125, 500, 16, retstep=True) # 25mg bins from 125-500mg ecdf4, step = np.linspace(600, 2000, 15, retstep=True) # 100mg bins from 500-2000mg ecdfXVals = np.concatenate([ecdf1, ecdf2, ecdf3, ecdf4]) # remove NaNs (necessary for statsmodels.api) ecdfData = e[['hour', 'minute', inputCol]][~np.isnan(e[inputCol])] if len(ecdfData) > 0: # set column names for actual, imputed, and adjusted intensity dist. vals cols = [] colsImputed = [] colsAdjusted = [] for xVal in ecdfXVals: col = 'ecdf' + str(xVal) cols.append(col) colsImputed.append(col + 'Imputed') colsAdjusted.append(col + 'Adjusted') ecdfData[col] = (ecdfData[inputCol] <= xVal) * 1.0 # calculate imputation values to replace nan metric values wearTimeWeights = ecdfData.groupby(['hour', 'minute'])[cols].mean() ecdfData = ecdfData.join(wearTimeWeights, on=['hour', 'minute'], rsuffix='Imputed') # for each ecdf xVal column, apply missing data imputation for col, imputed, adjusted in zip(cols, colsImputed, colsAdjusted): ecdfData[adjusted] = ecdfData[col].fillna(ecdfData[imputed]) accEcdf = ecdfData[colsAdjusted].mean() else: accEcdf = pd.Series(data=[0.0 for i in ecdfXVals], index=[str(i)+'Adjusted' for i in ecdfXVals]) # and write to summary dict for x, ecdf in zip(ecdfXVals, accEcdf): summary[inputCol + '-ecdf-' + str(accUtils.formatNum(x,0)) + 'mg'] = \ accUtils.formatNum(ecdf, 4)
def get_interrupts(e, epochPeriod, summary): """Identify if there are interrupts in the data recording :param pandas.DataFrame e: Pandas dataframe of epoch data :param int epochPeriod: Size of epoch time window (in seconds) :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins' :rtype: void """ e.dropna(subset=['enmoTrunc', 'xStd', 'yStd', 'zStd'], how='all', inplace=True) epochNs = epochPeriod * np.timedelta64(1, 's') interrupts = np.where(np.diff(np.array(e.index)) > epochNs)[0] # get duration of each interrupt in minutes interruptMins = [] for i in interrupts: interruptMins.append( np.diff(np.array(e[i:i + 2].index)) / np.timedelta64(1, 'm')) # record to output summary summary['errs-interrupts-num'] = len(interruptMins) summary['errs-interrupt-mins'] = accUtils.formatNum( np.sum(interruptMins), 1)
def get_interrupts(e, epochPeriod, summary): """Identify if there are interrupts in the data recording :param pandas.DataFrame e: Pandas dataframe of epoch data :param int epochPeriod: Size of epoch time window (in seconds) :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins' :rtype: void """ epochNs = epochPeriod * np.timedelta64(1, 's') interrupts = np.where(e.index.to_series(keep_tz=True).diff() > epochNs)[0] # Get duration of each interrupt in minutes interruptMins = [] for i in interrupts: interruptMins.append(e.index[i-1:i+1].to_series(keep_tz=True).diff() / np.timedelta64(1, 'm')) # Record to output summary summary['errs-interrupts-num'] = len(interruptMins) summary['errs-interrupt-mins'] = accUtils.formatNum(np.sum(interruptMins), 1) frames = [e] for i in interrupts: start, end = e.index[i-1:i+1] dti = pd.date_range(start=start, end=end, freq=str(epochPeriod)+'s')[1:-1] frames.append(dti.to_frame().drop(columns=0)) e = pd.concat(frames).sort_index() return e
def get_interrupts(e, epochPeriod, summary): """Identify if there are interrupts in the data recording :param pandas.DataFrame e: Pandas dataframe of epoch data :param int epochPeriod: Size of epoch time window (in seconds) :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins' :rtype: void """ epochNs = epochPeriod * np.timedelta64(1, 's') interrupts = np.where(np.diff(np.array(e.index)) > epochNs)[0] # get duration of each interrupt in minutes interruptMins = [] for i in interrupts: interruptMins.append( np.diff(np.array(e[i:i + 2].index)) / np.timedelta64(1, 'm')) # record to output summary summary['errs-interrupts-num'] = len(interruptMins) summary['errs-interrupt-mins'] = accUtils.formatNum( np.sum(interruptMins), 1) for i in interrupts: start, end = e[i:i + 2].index dti = pd.date_range(start=start, end=end, freq=str(epochPeriod) + 's')[1:-1] e = e.append(dti.to_frame()) e = e.sort_index() return e
def writeMovementSummaries(e, labels, summary, useRecommendedImputation): """Write overall summary stats for each activity type to summary dict :param pandas.DataFrame e: Pandas dataframe of epoch data :param list(str) labels: Activity state labels :param dict summary: Output dictionary containing all summary metrics :param bool useRecommendedImputation: Highly recommended method to impute missing data using data from other days around the same time :return: Write dict <summary> keys for each activity type 'overall-<avg/sd>', 'week<day/end>-avg', '<day..>-avg', 'hourOfDay-<hr..>-avg', 'hourOfWeek<day/end>-<hr..>-avg' :rtype: void """ # Identify activity types to summarise activityTypes = ['acc', 'CutPointMVPA', 'CutPointVPA'] activityTypes += labels if 'MET' in e.columns: activityTypes.append('MET') # Sumarise each type by: overall, week day/end, day, and hour of day for accType in activityTypes: col = accType if useRecommendedImputation: col += 'Imputed' if accType in ['CutPointMVPA', 'CutPointVPA']: col = accType # Overall / weekday / weekend summaries summary[accType + '-overall-avg'] = accUtils.formatNum( e[col].mean(), 5) summary[accType + '-overall-sd'] = accUtils.formatNum(e[col].std(), 2) summary[accType + '-weekday-avg'] = accUtils.formatNum( e[col][e.index.weekday <= 4].mean(), 2) summary[accType + '-weekend-avg'] = accUtils.formatNum( e[col][e.index.weekday >= 5].mean(), 2) # Daily summary for i, day in zip(range(0, 7), accUtils.DAYS): summary[accType + '-' + day + '-avg'] = accUtils.formatNum( e[col][e.index.weekday == i].mean(), 2) # Hourly summaries for i in range(0, 24): hourOfDay = accUtils.formatNum(e[col][e.index.hour == i].mean(), 2) hourOfWeekday = accUtils.formatNum( e[col][(e.index.weekday <= 4) & (e.index.hour == i)].mean(), 2) hourOfWeekend = accUtils.formatNum( e[col][(e.index.weekday >= 5) & (e.index.hour == i)].mean(), 2) # Write derived hourly values to summary dictionary summary[accType + '-hourOfDay-' + str(i) + '-avg'] = hourOfDay summary[accType + '-hourOfWeekday-' + str(i) + '-avg'] = hourOfWeekday summary[accType + '-hourOfWeekend-' + str(i) + '-avg'] = hourOfWeekend
def storeCalibrationInformation(summary, bestIntercept, bestSlope, bestTemp, meanTemp, initError, bestError, xMin, xMax, yMin, yMax, zMin, zMax, nStatic, calibrationSphereCriteria = 0.3): """Store calibration information to output summary dictionary :param dict summary: Output dictionary containing all summary metrics :param list(float) bestIntercept: Best x/y/z intercept values :param list(float) bestSlope: Best x/y/z slope values :param list(float) bestTemperature: Best x/y/z temperature values :param float meanTemp: Calibration mean temperature in file :param float initError: Root mean square error (in mg) before calibration :param float initError: Root mean square error (in mg) after calibration :param float xMin: xMin information on spread of stationary points :param float xMax: xMax information on spread of stationary points :param float yMin: yMin information on spread of stationary points :param float yMax: yMax information on spread of stationary points :param float zMin: zMin information on spread of stationary points :param float zMax: zMax information on spread of stationary points :param int nStatic: number of stationary points used for calibration :param float calibrationSphereCriteria: Threshold to check how well file was calibrated :return: Calibration summary values written to dict <summary> :rtype: void """ # store output to summary dictionary summary['calibration-errsBefore(mg)'] = accUtils.formatNum(initError*1000, 2) summary['calibration-errsAfter(mg)'] = accUtils.formatNum(bestError*1000, 2) storeCalibrationParams(summary, bestIntercept[0], bestIntercept[1], bestIntercept[2], bestSlope[0], bestSlope[1], bestSlope[2], bestTemp[0], bestTemp[1], bestTemp[2], meanTemp) summary['calibration-numStaticPoints'] = nStatic summary['calibration-staticXmin(g)'] = accUtils.formatNum(xMin, 2) summary['calibration-staticXmax(g)'] = accUtils.formatNum(xMax, 2) summary['calibration-staticYmin(g)'] = accUtils.formatNum(yMin, 2) summary['calibration-staticYmax(g)'] = accUtils.formatNum(yMax, 2) summary['calibration-staticZmin(g)'] = accUtils.formatNum(zMin, 2) summary['calibration-staticZmax(g)'] = accUtils.formatNum(zMax, 2) # check how well calibrated file was summary['quality-calibratedOnOwnData'] = 1 summary['quality-goodCalibration'] = 1 s = calibrationSphereCriteria try: if xMin > -s or xMax < s or yMin > -s or yMax < s or zMin > -s or zMax < s or \ np.isnan(xMin) or np.isnan(yMin) or np.isnan(zMin): summary['quality-goodCalibration'] = 0 except UnboundLocalError: summary['quality-goodCalibration'] = 0
def writeMovementSummaries(e, labels, summary): """Write overall summary stats for each activity type to summary dict :param pandas.DataFrame e: Pandas dataframe of epoch data :param list(str) labels: Activity state labels :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys for each activity type 'overall-<avg/sd>', 'week<day/end>-avg', '<day..>-avg', 'hourOfDay-<hr..>-avg', 'hourOfWeek<day/end>-<hr..>-avg' :rtype: void """ # identify activity types to summarise activityTypes = ['acc', 'MVPA', 'VPA'] activityTypes += labels if 'MET' in e.columns: activityTypes.append('MET') # sumarise each type by: overall, week day/end, day, and hour of day for accType in activityTypes: col = accType + 'Imputed' if accType in ['MVPA', 'VPA']: col = accType # overall / weekday / weekend summaries summary[accType + '-overall-avg'] = accUtils.formatNum(e[col].mean(), 2) summary[accType + '-overall-sd'] = accUtils.formatNum(e[col].std(), 2) summary[accType + '-weekday-avg'] = accUtils.formatNum( \ e[col][e.index.weekday<=4].mean(), 2) summary[accType + '-weekend-avg'] = accUtils.formatNum( \ e[col][e.index.weekday>=5].mean(), 2) # daily summary for i, day in zip(range(0, 7), accUtils.DAYS): summary[accType + '-' + day + '-avg'] = accUtils.formatNum( \ e[col][e.index.weekday == i].mean(), 2) # hourly summaries for i in range(0, 24): hourOfDay = accUtils.formatNum(e[col][e.index.hour == i].mean(), 2) hourOfWeekday = accUtils.formatNum( \ e[col][(e.index.weekday<=4) & (e.index.hour == i)].mean(), 2) hourOfWeekend = accUtils.formatNum( \ e[col][(e.index.weekday>=5) & (e.index.hour == i)].mean(), 2) # write derived hourly values to summary dictionary summary[accType + '-hourOfDay-' + str(i) + '-avg'] = hourOfDay summary[accType + '-hourOfWeekday-' + str(i) + '-avg'] = hourOfWeekday summary[accType + '-hourOfWeekend-' + str(i) + '-avg'] = hourOfWeekend
def storeCalibrationParams(summary, xyzOff, xyzSlope, xyzTemp, meanTemp): """Store calibration parameters to output summary dictionary :param dict summary: Output dictionary containing all summary metrics :param list(float) xyzOff: intercept [x, y, z] :param list(float) xyzSlope: slope [x, y, z] :param list(float) xyzTemp: temperature [x, y, z] :param float meanTemp: Calibration mean temperature in file :return: Calibration summary values written to dict <summary> :rtype: void """ # store output to summary dictionary summary['calibration-xOffset(g)'] = accUtils.formatNum(xyzOff[0], 4) summary['calibration-yOffset(g)'] = accUtils.formatNum(xyzOff[1], 4) summary['calibration-zOffset(g)'] = accUtils.formatNum(xyzOff[2], 4) summary['calibration-xSlope(g)'] = accUtils.formatNum(xyzSlope[0], 4) summary['calibration-ySlope(g)'] = accUtils.formatNum(xyzSlope[1], 4) summary['calibration-zSlope(g)'] = accUtils.formatNum(xyzSlope[2], 4) summary['calibration-xTemp(C)'] = accUtils.formatNum(xyzTemp[0], 4) summary['calibration-yTemp(C)'] = accUtils.formatNum(xyzTemp[1], 4) summary['calibration-zTemp(C)'] = accUtils.formatNum(xyzTemp[2], 4) summary['calibration-meanDeviceTemp(C)'] = accUtils.formatNum(meanTemp, 2)
def get_interrupts(e, epochPeriod, summary): """Identify if there are interrupts in the data recording :param pandas.DataFrame e: Pandas dataframe of epoch data :param int epochPeriod: Size of epoch time window (in seconds) :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys 'err-interrupts-num' & 'errs-interrupt-mins' :rtype: void """ e.dropna(subset=['enmoTrunc', 'xStd', 'yStd', 'zStd'], how='all', inplace=True) epochNs = epochPeriod * np.timedelta64(1, 's') interrupts = np.where(np.diff(np.array(e.index)) > epochNs)[0] # get duration of each interrupt in minutes interruptMins = [] for i in interrupts: interruptMins.append( np.diff(np.array(e[i:i+2].index)) / np.timedelta64(1, 'm') ) # record to output summary summary['errs-interrupts-num'] = len(interruptMins) summary['errs-interrupt-mins'] = accUtils.formatNum(np.sum(interruptMins), 1)
def calculateECDF(e, inputCol, summary): """Calculate activity intensity empirical cumulative distribution The input data must not be imputed, as ECDF requires different imputation where nan/non-wear data segments are IMPUTED FOR EACH INTENSITY LEVEL. Here, the average of similar time-of-day values is imputed with one minute granularity on different days of the measurement. Following intensity levels are calculated 1mg bins from 1-20mg 5mg bins from 25-100mg 25mg bins from 125-500mg 100mg bins from 500-2000mg :param pandas.DataFrame e: Pandas dataframe of epoch data :param str inputCol: Column to calculate intensity distribution on :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys '<inputCol>-ecdf-<level...>mg' :rtype: void """ ecdf1, step = np.linspace(1, 20, 20, retstep=True) # 1mg bins from 1-20mg ecdf2, step = np.linspace(25, 100, 16, retstep=True) # 5mg bins from 25-100mg ecdf3, step = np.linspace(125, 500, 16, retstep=True) # 25mg bins from 125-500mg ecdf4, step = np.linspace(600, 2000, 15, retstep=True) # 100mg bins from 500-2000mg ecdfXVals = np.concatenate([ecdf1, ecdf2, ecdf3, ecdf4]) # remove NaNs (necessary for statsmodels.api) ecdfData = e[['hour', 'minute', inputCol]][~np.isnan(e[inputCol])] if len(ecdfData) > 0: # set column names for actual, imputed, and adjusted intensity dist. vals cols = [] colsImputed = [] colsAdjusted = [] for xVal in ecdfXVals: col = 'ecdf' + str(xVal) cols.append(col) colsImputed.append(col + 'Imputed') colsAdjusted.append(col + 'Adjusted') ecdfData[col] = (ecdfData[inputCol] <= xVal) * 1.0 # calculate imputation values to replace nan metric values wearTimeWeights = ecdfData.groupby(['hour', 'minute'])[cols].mean() ecdfData = ecdfData.join(wearTimeWeights, on=['hour', 'minute'], rsuffix='Imputed') # for each ecdf xVal column, apply missing data imputation for col, imputed, adjusted in zip(cols, colsImputed, colsAdjusted): ecdfData[adjusted] = ecdfData[col].fillna(ecdfData[imputed]) accEcdf = ecdfData[colsAdjusted].mean() else: accEcdf = pd.Series(data=[0.0 for i in ecdfXVals], index=[str(i) + 'Adjusted' for i in ecdfXVals]) # and write to summary dict for x, ecdf in zip(ecdfXVals, accEcdf): summary[inputCol + '-ecdf-' + str(accUtils.formatNum(x,0)) + 'mg'] = \ accUtils.formatNum(ecdf, 4)
def get_wear_time_stats(e, epochPeriod, maxStd, minDuration, nonWearFile, summary): """Calculate nonWear time, write episodes to file, and return wear statistics If daylight savings crossover, update times after time-change by +/- 1hr. Also, if Autumn crossover time, remove last 1hr chunk before time-change. :param pandas.DataFrame e: Pandas dataframe of epoch data :param int epochPeriod: Size of epoch time window (in seconds) :param int maxStd: Threshold (in mg units) for stationary vs not :param int minDuration: Minimum duration of nonwear events (minutes) :param str nonWearFile: Output filename for non wear .csv.gz episodes :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys 'wearTime-numNonWearEpisodes(>1hr)', 'wearTime-overall(days)', 'nonWearTime-overall(days)', 'wearTime-diurnalHrs', 'wearTime-diurnalMins', 'quality-goodWearTime', 'wearTime-<day...>', and 'wearTime-hourOfDay-<hr...>' :rtype: void :return: Write .csv.gz non wear episodes file to <nonWearFile> :rtype: void """ maxStd = maxStd / 1000.0 # java uses Gravity units (not mg) e['nw'] = np.where( (e['xStd'] < maxStd) & (e['yStd'] < maxStd) & (e['zStd'] < maxStd), 1, 0) starts = e.index[(e['nw'] == True) & (e['nw'].shift(1).fillna(False) == False)] ends = e.index[(e['nw'] == True) & (e['nw'].shift(-1).fillna(False) == False)] nonWearEpisodes = [(start, end) for start, end in zip(starts, ends) if end > start + np.timedelta64(minDuration, 'm')] # set nonWear data to nan and record to nonWearBouts file f = gzip.open(nonWearFile, 'wb') f.write('start,end,xStdMax,yStdMax,zStdMax\n'.encode()) timeFormat = '%Y-%m-%d %H:%M:%S' for episode in nonWearEpisodes: tmp = e[['xStd', 'yStd', 'zStd']][episode[0]:episode[1]] nonWearBout = episode[0].strftime(timeFormat) + ',' nonWearBout += episode[1].strftime(timeFormat) + ',' nonWearBout += str(tmp['xStd'].mean()) + ',' nonWearBout += str(tmp['yStd'].mean()) + ',' nonWearBout += str(tmp['zStd'].mean()) + '\n' f.write(nonWearBout.encode()) # set main dataframe values to nan e[episode[0]:episode[1]] = np.nan f.close() # write to summary summary['wearTime-numNonWearEpisodes(>1hr)'] = int(len(nonWearEpisodes)) # calculate wear statistics wearSamples = e['enmoTrunc'].count() nonWearSamples = len(e[np.isnan(e['enmoTrunc'])].index.values) wearTimeMin = wearSamples * epochPeriod / 60.0 nonWearTimeMin = nonWearSamples * epochPeriod / 60.0 # write to summary summary['wearTime-overall(days)'] = accUtils.formatNum( wearTimeMin / 1440.0, 2) summary['nonWearTime-overall(days)'] = accUtils.formatNum( nonWearTimeMin / 1440.0, 2) # get wear time in each of 24 hours across week epochsInMin = 60.0 / epochPeriod for i, day in zip(range(0, 7), accUtils.DAYS): dayWear = e['enmoTrunc'][e.index.weekday == i].count() / epochsInMin # write to summary summary['wearTime-' + day + '(hrs)'] = accUtils.formatNum( dayWear / 60.0, 2) for i in range(0, 24): hourWear = e['enmoTrunc'][e.index.hour == i].count() / epochsInMin # write to summary summary['wearTime-hourOfDay' + str(i) + '-(hrs)'] = \ accUtils.formatNum(hourWear/60.0, 2) summary['wearTime-diurnalHrs'] = accUtils.formatNum( \ e['enmoTrunc'].groupby(e.index.hour).mean().count(), 2) summary['wearTime-diurnalMins'] = accUtils.formatNum( \ e['enmoTrunc'].groupby([e.index.hour, e.index.minute]).mean().count(), 2) # write binary decision on whether weartime was good or not minDiurnalHrs = 24 minWearDays = 3 summary['quality-goodWearTime'] = 1 if summary['wearTime-diurnalHrs'] < minDiurnalHrs or \ summary['wearTime-overall(days)'] < minWearDays: summary['quality-goodWearTime'] = 0
def get_wear_time_stats(e, epochPeriod, maxStd, minDuration, nonWearFile, summary): """Calculate nonWear time, write episodes to file, and return wear statistics If daylight savings crossover, update times after time-change by +/- 1hr. Also, if Autumn crossover time, remove last 1hr chunk before time-change. :param pandas.DataFrame e: Pandas dataframe of epoch data :param int epochPeriod: Size of epoch time window (in seconds) :param int maxStd: Threshold (in mg units) for stationary vs not :param int minDuration: Minimum duration of nonwear events (minutes) :param str nonWearFile: Output filename for non wear .csv.gz episodes :param dict summary: Output dictionary containing all summary metrics :return: Write dict <summary> keys 'wearTime-numNonWearEpisodes(>1hr)', 'wearTime-overall(days)', 'nonWearTime-overall(days)', 'wearTime-diurnalHrs', 'wearTime-diurnalMins', 'quality-goodWearTime', 'wearTime-<day...>', and 'wearTime-hourOfDay-<hr...>' :rtype: void :return: Write .csv.gz non wear episodes file to <nonWearFile> :rtype: void """ maxStd = maxStd / 1000.0 # java uses Gravity units (not mg) e['nw'] = np.where((e['xStd']<maxStd) & (e['yStd']<maxStd) & (e['zStd']<maxStd), 1, 0) starts = e.index[(e['nw']==True) & (e['nw'].shift(1).fillna(False)==False)] ends = e.index[(e['nw']==True) & (e['nw'].shift(-1).fillna(False)==False)] nonWearEpisodes = [(start, end) for start, end in zip(starts, ends) if end > start + np.timedelta64(minDuration,'m')] # set nonWear data to nan and record to nonWearBouts file f = gzip.open(nonWearFile,'wb') f.write('start,end,xStdMax,yStdMax,zStdMax\n'.encode()) timeFormat = '%Y-%m-%d %H:%M:%S' for episode in nonWearEpisodes: tmp = e[['xStd','yStd','zStd']][episode[0]:episode[1]] nonWearBout = episode[0].strftime(timeFormat) + ',' nonWearBout += episode[1].strftime(timeFormat) + ',' nonWearBout += str(tmp['xStd'].mean()) + ',' nonWearBout += str(tmp['yStd'].mean()) + ',' nonWearBout += str(tmp['zStd'].mean()) + '\n' f.write(nonWearBout.encode()) # set main dataframe values to nan e[episode[0]:episode[1]] = np.nan f.close() # write to summary summary['wearTime-numNonWearEpisodes(>1hr)'] = int(len(nonWearEpisodes)) # calculate wear statistics wearSamples = e['enmoTrunc'].count() nonWearSamples = len(e[np.isnan(e['enmoTrunc'])].index.values) wearTimeMin = wearSamples * epochPeriod / 60.0 nonWearTimeMin = nonWearSamples * epochPeriod / 60.0 # write to summary summary['wearTime-overall(days)'] = accUtils.formatNum(wearTimeMin/1440.0, 2) summary['nonWearTime-overall(days)'] = accUtils.formatNum(nonWearTimeMin/1440.0, 2) # get wear time in each of 24 hours across week epochsInMin = 60.0 / epochPeriod for i, day in zip(range(0, 7), accUtils.DAYS): dayWear = e['enmoTrunc'][e.index.weekday == i].count() / epochsInMin # write to summary summary['wearTime-' + day + '(hrs)'] = accUtils.formatNum(dayWear/60.0, 2) for i in range(0, 24): hourWear = e['enmoTrunc'][e.index.hour == i].count() / epochsInMin # write to summary summary['wearTime-hourOfDay' + str(i) + '-(hrs)'] = \ accUtils.formatNum(hourWear/60.0, 2) summary['wearTime-diurnalHrs'] = accUtils.formatNum( \ e['enmoTrunc'].groupby(e.index.hour).mean().count(), 2) summary['wearTime-diurnalMins'] = accUtils.formatNum( \ e['enmoTrunc'].groupby([e.index.hour, e.index.minute]).mean().count(), 2) # write binary decision on whether weartime was good or not minDiurnalHrs = 24 minWearDays = 3 summary['quality-goodWearTime'] = 1 if summary['wearTime-diurnalHrs'] < minDiurnalHrs or \ summary['wearTime-overall(days)'] < minWearDays: summary['quality-goodWearTime'] = 0