Example #1
0
def getActivitySummary(epochFile, nonWearFile, summary,
    activityClassification=True, timeZone='Europe/London',
    startTime=None, endTime=None,
    epochPeriod=30, stationaryStd=13, minNonWearDuration=60,
    mgCutPointMVPA=100, mgCutPointVPA=425,
    activityModel="activityModels/walmsley-nov20.tar",
    intensityDistribution=False, useRecommendedImputation=True,
    psd=False, fourierFrequency=False, fourierWithAcc=False, m10l5=False,
    verbose=False):
    """Calculate overall activity summary from <epochFile> data

    Get overall activity summary from input <epochFile>. This is achieved by
    1) get interrupt and data error summary vals
    2) check if data occurs at a daylight savings crossover
    3) calculate wear-time statistics, and write nonWear episodes to file
    4) predict activity from features, and add label column
    5) calculate imputation values to replace nan PA metric values
    6) calculate empirical cumulative distribution function of vector magnitudes
    7) derive main movement summaries (overall, weekday/weekend, and hour)

    :param str epochFile: Input csv.gz file of processed epoch data
    :param str nonWearFile: Output filename for non wear .csv.gz episodes
    :param dict summary: Output dictionary containing all summary metrics
    :param bool activityClassification: Perform machine learning of activity states
    :param str timeZone: timezone in country/city format to be used for daylight
        savings crossover check
    :param datetime startTime: Remove data before this time in analysis
    :param datetime endTime: Remove data after this time in analysis
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param int stationaryStd: Threshold (in mg units) for stationary vs not
    :param int minNonWearDuration: Minimum duration of nonwear events (minutes)
    :param int mgCutPointMVPA: Milli-gravity threshold for moderate intensity activity
    :param int mgCutPointVPA: Milli-gravity threshold for vigorous intensity activity
    :param str activityModel: Input tar model file which contains random forest
        pickle model, HMM priors/transitions/emissions npy files, and npy file
        of METS for each activity state
    :param bool intensityDistribution: Add intensity outputs to dict <summary>
    :param bool useRecommendedImputation: Highly recommended method to impute
        missing data using data from other days around the same time
    :param bool verbose: Print verbose output

    :return: Pandas dataframe of activity epoch data
    :rtype: pandas.DataFrame

    :return: Activity prediction labels (empty if <activityClassification>==False)
    :rtype: list(str)

    :return: Write .csv.gz non wear episodes file to <nonWearFile>
    :rtype: void

    :return: Movement summary values written to dict <summary>
    :rtype: void

    :Example:
    >>> import summariseEpoch
    >>> summary = {}
    >>> epochData, labels = summariseEpoch.getActivitySummary( "epoch.csv.gz",
            "nonWear.csv.gz", summary)
    <nonWear file written to "nonWear.csv.gz" and dict "summary" update with outcomes>
    """

    accUtils.toScreen("=== Summarizing ===")

    if isinstance(epochFile, pd.DataFrame):
        e = epochFile
    else:
        # Use python PANDAS framework to read in and store epochs
        e = pd.read_csv(
            epochFile, index_col=['time'],
            parse_dates=['time'], date_parser=accUtils.date_parser,
        )

    # Remove data before/after user specified start/end times
    rows = e.shape[0]
    tz = pytz.timezone(timeZone)
    if startTime:
        localStartTime = tz.localize(startTime)
        e = e[e.index >= localStartTime]
    if endTime:
        localEndTime = tz.localize(endTime)
        e = e[e.index <= localEndTime]
    # Quit if no data left
    if e.shape[0] == 0:
        print("No rows remaining after start/end time removal")
        print("Previously there were %d rows, now shape: %s" % (rows, str(e.shape)))
        sys.exit(-9)

    # Get start & end times
    startTime = e.index[0]
    endTime = e.index[-1]
    summary['file-startTime'] = accUtils.date_strftime(startTime)
    summary['file-endTime'] = accUtils.date_strftime(endTime)
    summary['file-firstDay(0=mon,6=sun)'] = startTime.weekday()

    # Get interrupt and data error summary vals
    e = get_interrupts(e, epochPeriod, summary)

    # Check daylight savings time crossover
    check_daylight_savings_crossovers(e, summary)

    # Calculate wear-time statistics, and write nonWear episodes to file
    get_wear_time_stats(e, epochPeriod, stationaryStd, minNonWearDuration,
        nonWearFile, summary)

    # Calculate and include data quality statistics
    get_total_reads(e, epochPeriod, summary)
    get_clips(e, epochPeriod, summary)


    # Predict activity from features, and add label column
    if activityClassification:
        e, labels = accClassification.activityClassification(e, activityModel)
    else:
        labels = []

    # enmo : Euclidean Norm Minus One
    # Trunc :  negative values truncated to zero (i.e never negative)
    # emmo = 1 - sqrt(x, y, z)
    # enmoTrunc = max(enmo, 0)
    e['acc'] = e['enmoTrunc'] * 1000 # convert enmoTrunc to milli-G units

    # Calculate imputation values to replace nan PA metric values
    e = perform_wearTime_imputation(e, verbose)
    e['CutPointMVPA'] = e['accImputed'] >= mgCutPointMVPA
    e['CutPointVPA'] = e['accImputed'] >= mgCutPointVPA

    # Calculate empirical cumulative distribution function of vector magnitudes
    if intensityDistribution:
        calculateECDF(e, 'acc', summary, useRecommendedImputation)

    # Calculate circadian metrics
    if psd:
        circadianRhythms.calculatePSD(e, epochPeriod, fourierWithAcc, labels, summary)
    if fourierFrequency:
        circadianRhythms.calculateFourierFreq(e, epochPeriod, fourierWithAcc, labels, summary)
    if m10l5:
        circadianRhythms.calculateM10L5(e, epochPeriod, summary)

    # Main movement summaries
    writeMovementSummaries(e, labels, summary, useRecommendedImputation)

    # Return physical activity summary
    return e, labels
def getActivitySummary(epochFile,
                       nonWearFile,
                       summary,
                       activityClassification=True,
                       startTime=None,
                       endTime=None,
                       epochPeriod=30,
                       stationaryStd=13,
                       minNonWearDuration=60,
                       mgMVPA=100,
                       mgVPA=425,
                       activityModel="activityModels/doherty2018.tar",
                       intensityDistribution=False,
                       verbose=False):
    """Calculate overall activity summary from <epochFile> data

    Get overall activity summary from input <epochFile>. This is achieved by
    1) get interrupt and data error summary vals
    2) check if data occurs at a daylight savings crossover
    3) calculate wear-time statistics, and write nonWear episodes to file
    4) predict activity from features, and add label column
    5) calculate imputation values to replace nan PA metric values
    6) calculate empirical cumulative distribution function of vector magnitudes
    7) derive main movement summaries (overall, weekday/weekend, and hour)

    :param str epochFile: Input csv.gz file of processed epoch data
    :param str nonWearFile: Output filename for non wear .csv.gz episodes
    :param dict summary: Output dictionary containing all summary metrics
    :param bool activityClassification: Perform machine learning of activity states
    :param datetime startTime: Remove data before this time in analysis
    :param datetime endTime: Remove data after this time in analysis 
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param int stationaryStd: Threshold (in mg units) for stationary vs not
    :param int minNonWearDuration: Minimum duration of nonwear events (minutes)
    :param int mgMVPA: Milli-gravity threshold for moderate intensity activity
    :param int mgVPA: Milli-gravity threshold for vigorous intensity activity
    :param str activityModel: Input tar model file which contains random forest
        pickle model, HMM priors/transitions/emissions npy files, and npy file
        of METS for each activity state
    :param bool intensityDistribution: Add intensity outputs to dict <summary>
    :param bool verbose: Print verbose output
    
    :return: Pandas dataframe of activity epoch data
    :rtype: pandas.DataFrame

    :return: Activity prediction labels (empty if <activityClassification>==False)
    :rtype: list(str)

    :return: Write .csv.gz non wear episodes file to <nonWearFile>
    :rtype: void

    :return: Movement summary values written to dict <summary>
    :rtype: void

    :Example:
    >>> import summariseEpoch
    >>> summary = {}
    >>> epochData, labels = summariseEpoch.getActivitySummary( "epoch.csv.gz", 
            "nonWear.csv.gz", summary)
    <nonWear file written to "nonWear.csv.gz" and dict "summary" update with outcomes>
    """

    if isinstance(epochFile, pd.DataFrame):
        e = epochFile
    else:
        # use python PANDAS framework to read in and store epochs
        e = pd.read_csv(epochFile,
                        parse_dates=['time'],
                        index_col=['time'],
                        compression='gzip').sort_index()

    # remove data before/after user specified start/end times
    rows = e.shape[0]
    if startTime:
        e = e[e.index >= startTime]
    if endTime:
        e = e[e.index <= endTime]
    # quit if no data left
    if e.shape[0] == 0:
        print("no rows remaining after start/end time removal")
        print("previously there were %d rows, now shape: %s" %
              (rows, str(e.shape)))
        sys.exit(-9)

    # get start & end times
    startTime = pd.to_datetime(e.index.values[0])
    endTime = pd.to_datetime(e.index.values[-1])
    summary['file-startTime'] = startTime.strftime('%Y-%m-%d %H:%M:%S')
    summary['file-endTime'] = endTime.strftime('%Y-%m-%d %H:%M:%S')
    summary['file-firstDay(0=mon,6=sun)'] = startTime.weekday()

    # get interrupt and data error summary vals
    interruptMins = get_interrupts(e, epochPeriod, summary)

    # check if data occurs at a daylight savings crossover
    e = check_daylight_savings_crossover(e, startTime, endTime, summary)

    # calculate wear-time statistics, and write nonWear episodes to file
    get_wear_time_stats(e, epochPeriod, stationaryStd, minNonWearDuration,
                        nonWearFile, summary)

    # predict activity from features, and add label column
    if activityClassification:
        e, labels = accClassification.activityClassification(e, activityModel)
    else:
        labels = []

    # enmo : Euclidean Norm Minus One
    # Trunc :  negative values truncated to zero (i.e never negative)
    # emmo = 1 - sqrt(x, y, z)
    # enmoTrunc = max(enmo, 0)
    e['acc'] = e['enmoTrunc'] * 1000  # convert enmoTrunc to milli-G units

    # calculate imputation values to replace nan PA metric values
    e = perform_wearTime_imputation(e, verbose)
    e['MVPA'] = e['accImputed'] >= mgMVPA
    e['VPA'] = e['accImputed'] >= mgVPA

    # calculate empirical cumulative distribution function of vector magnitudes
    if intensityDistribution:
        calculateECDF(e, 'acc', summary)

    # main movement summaries
    writeMovementSummaries(e, labels, summary)

    # return physical activity summary
    return e, labels
def getActivitySummary(epochFile, nonWearFile, summary,
    activityClassification = True, startTime = None, endTime = None, 
    epochPeriod = 30, stationaryStd = 13, minNonWearDuration = 60, mgMVPA = 100, 
    mgVPA = 425, activityModel = "activityModels/doherty2018.tar", 
    intensityDistribution = False, verbose = False):
    """Calculate overall activity summary from <epochFile> data

    Get overall activity summary from input <epochFile>. This is achieved by
    1) get interrupt and data error summary vals
    2) check if data occurs at a daylight savings crossover
    3) calculate wear-time statistics, and write nonWear episodes to file
    4) predict activity from features, and add label column
    5) calculate imputation values to replace nan PA metric values
    6) calculate empirical cumulative distribution function of vector magnitudes
    7) derive main movement summaries (overall, weekday/weekend, and hour)

    :param str epochFile: Input csv.gz file of processed epoch data
    :param str nonWearFile: Output filename for non wear .csv.gz episodes
    :param dict summary: Output dictionary containing all summary metrics
    :param bool activityClassification: Perform machine learning of activity states
    :param datetime startTime: Remove data before this time in analysis
    :param datetime endTime: Remove data after this time in analysis 
    :param int epochPeriod: Size of epoch time window (in seconds)
    :param int stationaryStd: Threshold (in mg units) for stationary vs not
    :param int minNonWearDuration: Minimum duration of nonwear events (minutes)
    :param int mgMVPA: Milli-gravity threshold for moderate intensity activity
    :param int mgVPA: Milli-gravity threshold for vigorous intensity activity
    :param str activityModel: Input tar model file which contains random forest
        pickle model, HMM priors/transitions/emissions npy files, and npy file
        of METS for each activity state
    :param bool intensityDistribution: Add intensity outputs to dict <summary>
    :param bool verbose: Print verbose output
    
    :return: Pandas dataframe of activity epoch data
    :rtype: pandas.DataFrame

    :return: Activity prediction labels (empty if <activityClassification>==False)
    :rtype: list(str)

    :return: Write .csv.gz non wear episodes file to <nonWearFile>
    :rtype: void

    :return: Movement summary values written to dict <summary>
    :rtype: void

    :Example:
    >>> import summariseEpoch
    >>> summary = {}
    >>> epochData, labels = summariseEpoch.getActivitySummary( "epoch.csv.gz", 
            "nonWear.csv.gz", summary)
    <nonWear file written to "nonWear.csv.gz" and dict "summary" update with outcomes>
    """

    if isinstance(epochFile, pd.DataFrame):
        e = epochFile
    else:
        # use python PANDAS framework to read in and store epochs
        e = pd.read_csv(epochFile, parse_dates=['time'], index_col=['time'],
            compression='gzip').sort_index()
    
    # remove data before/after user specified start/end times
    rows = e.shape[0]
    if startTime:
        e = e[e.index >= startTime]
    if endTime:
        e = e[e.index <= endTime]
    # quit if no data left
    if e.shape[0] == 0:
        print("no rows remaining after start/end time removal")
        print("previously there were %d rows, now shape: %s" % (rows, str(e.shape)))
        sys.exit(-9)

    # get start & end times
    startTime = pd.to_datetime(e.index.values[0])
    endTime = pd.to_datetime(e.index.values[-1])
    summary['file-startTime'] = startTime.strftime('%Y-%m-%d %H:%M:%S')
    summary['file-endTime'] = endTime.strftime('%Y-%m-%d %H:%M:%S')
    summary['file-firstDay(0=mon,6=sun)'] = startTime.weekday()

    # get interrupt and data error summary vals
    interruptMins = get_interrupts(e, epochPeriod, summary)

    # check if data occurs at a daylight savings crossover
    e = check_daylight_savings_crossover(e, startTime, endTime, summary)
    
    # calculate wear-time statistics, and write nonWear episodes to file
    get_wear_time_stats(e, epochPeriod, stationaryStd, minNonWearDuration, 
        nonWearFile, summary)
    
    # predict activity from features, and add label column
    if activityClassification:
        e, labels = accClassification.activityClassification(e, activityModel)
    else:
        labels = []

    # enmo : Euclidean Norm Minus One
    # Trunc :  negative values truncated to zero (i.e never negative)
    # emmo = 1 - sqrt(x, y, z)
    # enmoTrunc = max(enmo, 0)
    e['acc'] = e['enmoTrunc'] * 1000 # convert enmoTrunc to milli-G units
    
    # calculate imputation values to replace nan PA metric values
    e = perform_wearTime_imputation(e, verbose)
    e['MVPA'] = e['accImputed'] >= mgMVPA
    e['VPA'] = e['accImputed'] >= mgVPA

    # calculate empirical cumulative distribution function of vector magnitudes
    if intensityDistribution:
        calculateECDF(e, 'acc', summary)

    # main movement summaries
    writeMovementSummaries(e, labels, summary)
    
    # return physical activity summary
    return e, labels