Exemple #1
0
def test_load_data():
    # Load/format data
    src = os.path.abspath(__file__ + '/../../')+'/demo/demo_data.csv'
    raw_data = pd.read_csv(src, skiprows=99, names=['timestamps', 'x', 'y', 'z'], usecols=[0, 1, 2, 3])
    raw_data['unix_timestamps'] = pd.to_datetime(raw_data.timestamps, format="%Y-%m-%d %H:%M:%S:%f").values.astype(np.int64) // 10**6
    raw_data = raw_data.iloc[:10,:]

    # Create an instance of GaitPy
    gaitpy = Gaitpy(raw_data,
                    50,
                    v_acc_col_name='y',
                    ts_col_name='unix_timestamps',
                    v_acc_units='g',
                    ts_units='ms',
                    flip=False)

    # Run function being tested
    obtained_y_accel, obtained_ts = util._load_data(gaitpy, gaitpy.down_sample)

    # Confirm expected results
    expected_y_accel = pd.Series(np.array([7.138261,7.177487,7.177487,7.215733,6.177209,7.868856,7.676646,5.792788,4.831736,10.713765]), name='y')
    pd.testing.assert_series_equal(obtained_y_accel, expected_y_accel)

    expected_ts = pd.Series(np.array([1565087150000,1565087150020,1565087150040,1565087150060,1565087150080,
                                      1565087150100,1565087150120,1565087150140,1565087150160,1565087150180]), name='unix_timestamps')
    pd.testing.assert_series_equal(obtained_ts, expected_ts)
Exemple #2
0
    def extract_features(self,
                         subject_height,
                         subject_height_units='centimeter',
                         sensor_height_ratio=0.53,
                         result_file=None,
                         classified_gait=None,
                         ic_prom=5,
                         fc_prom=25):
        ''' Continuous wavelet transform based method of gait feature detection optimization methods

        Parameters
        ----------
        subject_height : int or float
            Height of the subject. Accepts centimeters by default.

        subject_height_units : str
            Units of provided subject height. Centimeters by default.
            - options: 'centimeter', 'inches', 'meter'

        sensor_height_ratio : float
            Height of the sensor relative to subject height. Calculated: sensor height / subject height

        result_file : str
            Optional argument that accepts .csv filepath string to save resulting gait feature dataframe to.
            None by default. (ie. myfolder/myfile.csv)

        classified_gait : str or pandas.core.frame.DataFrame
            Pandas dataframe containing results of gait bout classification procedure (classify_bouts)
            OR
            File path of .h5 file containing results of gait bout classification procedure (classify_bouts)

        ic_prom : int
            Prominance of initial contact peak detection

        fc_prom : int
            Prominance of final contact peak detection

        '''
        import pandas as pd
        import gaitpy.util as util
        import warnings

        print('\tExtracting features...')

        # Load data
        y_accel, timestamps = util._load_data(self, self.down_sample)

        # If classified gait is provided, load pandas dataframe or h5 file
        if classified_gait is not None:
            if type(classified_gait) is str:
                gait_predictions = pd.read_hdf(classified_gait)
            elif type(classified_gait) is pd.core.frame.DataFrame:
                gait_predictions = classified_gait
            else:
                print(
                    'Unable to load classified gait: Please make sure the data is in the correct format, aborting...'
                )
                return
            # Isolate gait bouts
            gait_windows = gait_predictions[gait_predictions['prediction'] ==
                                            1]
            if gait_windows.empty:
                print(
                    'The classified_gait data indicates no bouts of gait were detected, aborting...'
                )
                return

            # Concatenate concurrent bouts
            gait_bouts = util._concatenate_windows(gait_windows,
                                                   window_length=3)
        else:
            # if classified_gait is not provided, assume entire timeseries is 1 bout of gait
            start_time = timestamps[0].astype('datetime64[ms]')
            end_time = timestamps.iloc[-1].astype('datetime64[ms]')
            gait_bouts = pd.DataFrame(
                data={
                    'start_time': [start_time],
                    'end_time': [end_time],
                    'bout_length': [(end_time -
                                     start_time).item().total_seconds()]
                })

        all_bout_gait_features = pd.DataFrame()
        bout_n = 1
        # Loop through gait bouts
        for row_n, bout in gait_bouts.iterrows():
            bout_indices = (
                timestamps.astype('datetime64[ms]') >= bout.start_time) & (
                    timestamps.astype('datetime64[ms]') <= bout.end_time)
            bout_data = pd.DataFrame([])
            bout_data['y'] = pd.DataFrame(
                y_accel.loc[bout_indices].reset_index(drop=True))
            bout_data['ts'] = timestamps.loc[bout_indices].reset_index(
                drop=True)
            if len(bout_data.y) < 15:
                warnings.warn('There are too few data points between ' +
                              str(bout.start_time) + ' and ' +
                              str(bout.end_time) + ', skipping bout...')
                continue

            # Run CWT Gait Model IC and FC detection
            ic_peaks, fc_peaks = util._cwt(bout_data.y, self.down_sample,
                                           ic_prom, fc_prom)

            # Run gait cycle optimization procedure
            pd.options.mode.chained_assignment = None
            optimized_gait = util._optimization(bout_data['ts'], ic_peaks,
                                                fc_peaks)
            if optimized_gait.empty or 1 not in list(
                    optimized_gait.Gait_Cycle):
                continue

            # Calculate changes in height of the center of mass
            optimized_gait = util._height_change_com(optimized_gait,
                                                     bout_data['ts'],
                                                     bout_data['y'],
                                                     self.down_sample)

            # Calculate gait features
            sensor_height = util._calculate_sensor_height(
                subject_height, subject_height_units, sensor_height_ratio)
            gait_features = util._cwt_feature_extraction(
                optimized_gait, sensor_height)

            # remove center of mass height and gait cycle boolean columns, remove rows with NAs
            gait_features.dropna(inplace=True)
            gait_features.drop(['CoM_height', 'Gait_Cycle', 'FC_opp_foot'],
                               axis=1,
                               inplace=True)

            gait_features.insert(0, 'bout_number', bout_n)
            gait_features.insert(1, 'bout_length_sec', bout.bout_length)
            gait_features.insert(2, 'bout_start_time', bout.start_time)
            gait_features.insert(5, 'gait_cycles', len(gait_features))
            all_bout_gait_features = all_bout_gait_features.append(
                gait_features)

            bout_n += 1
        all_bout_gait_features.reset_index(drop=True, inplace=True)
        all_bout_gait_features.iloc[:,
                                    7:] = all_bout_gait_features.iloc[:,
                                                                      7:].round(
                                                                          3)

        # Save results
        if result_file:
            try:
                if not result_file.endswith('.csv'):
                    result_file += '.csv'
                all_bout_gait_features.to_csv(result_file,
                                              index=False,
                                              float_format='%.3f')
            except:
                print(
                    'Unable to save data: Please make sure your results directory exists, aborting...'
                )
                return

        if all_bout_gait_features.empty:
            print(
                '\tFeature extraction complete. No gait cycles detected...\n')
        else:
            print('\tFeature extraction complete!\n')

        return all_bout_gait_features
Exemple #3
0
    def classify_bouts(self, result_file=None):
        """ Gait bout classification using acceleration data in the vertical direction from the lumbar location.

        Parameters
        ----------
        result_file : str
            Optional argument that accepts .h5 filepath string to save resulting predictions to.
            None by default. (ie. myfolder/myfile.h5)

        """
        import pickle
        import pandas as pd
        import os
        import deepdish as dd
        import gaitpy.util as util

        print('\tClassifying bouts of gait...')

        # Load model and feature order
        model_filename = os.path.dirname(
            os.path.realpath(__file__)) + '/model/model.pkl'
        features_filename = os.path.dirname(
            os.path.realpath(__file__)) + '/model/feature_order.txt'
        model = pickle.load(open(model_filename, 'rb'))
        feature_order = open(features_filename, 'r').read().splitlines()
        model_sample_rate = 50.

        # Load data and convert to g
        raw_y_accel, ts = util._load_data(self, self.down_sample)
        y_accel = raw_y_accel / 9.80665

        # Resample data if necessary
        if self.down_sample > model_sample_rate:
            data, timestamps = util._resample_data(
                y_accel, ts,
                str(1000. / model_sample_rate) + 'ms')
        elif self.down_sample == model_sample_rate:
            data = pd.DataFrame({'y': y_accel})
            timestamps = pd.DatetimeIndex(ts.astype('datetime64[ms]'))
        elif self.down_sample < model_sample_rate:
            print(
                'Data sample rate too low for bout detection model. Minimum sample rate required: '
                + str(model_sample_rate) + ' hz, aborting...')
            return

        # Extract signal features from vertical acceleration data
        feature_set, start_times_list, end_times_list = util._extract_signal_features(
            data, timestamps, model_sample_rate)
        feature_set = feature_set[feature_order]

        # Predict
        try:
            pred = model.predict(feature_set)
            predictions_df = pd.DataFrame({
                'prediction': pred,
                'window_start_time': start_times_list,
                'window_end_time': end_times_list
            })
        except:
            print(
                'Unable to make predictions from signal features, aborting...')
            return

        # Save predictions to hdf file
        if result_file:
            try:
                if not result_file.endswith('.h5'):
                    result_file += '.h5'
                predictions_dict = {}
                predictions_dict['predictions'] = predictions_df

                dd.io.save(result_file, predictions_dict)
            except:
                print(
                    'Unable to save data: Please make sure your results directory exists, aborting...'
                )
                return

        print('\tBout classification complete!\n')

        return predictions_df
Exemple #4
0
    def plot_contacts(self, gait_features, result_file=None, show_plot=True):
        """ Plot initial and final contacts of lumbar based gait feature extraction

        Parameters
        ----------
        gait_features : pandas.DataFrame or str
            Pandas dataframe containing results of extract_features function
            OR
            File path of .csv file containing results of extract_features function

        result_file : str
            Optional argument that accepts .html filepath string to save resulting gait event plot to.
            None by default. (ie. myfolder/myfile.html)

        show_plot : bool
            Optional boolean argument that specifies whether your plot is displayed. True by default.

        """
        from bokeh.plotting import figure, output_file, save, show
        from bokeh.models import Legend, Span
        import pandas as pd
        import gaitpy.util as util
        import numpy as np

        print('\tPlotting contacts...')

        # Load data
        y_accel, timestamps = util._load_data(self, self.down_sample)
        ts = pd.to_datetime(timestamps, unit='ms')

        # Load gait_features
        try:
            if type(gait_features) is str:
                icfc = pd.read_csv(gait_features)
            elif type(gait_features) is pd.core.frame.DataFrame:
                icfc = gait_features
            else:
                print(
                    'Unable to load gait features: Please make sure the gait_features is in the correct format, aborting...'
                )
                return
        except:
            print(
                'Unable to load gait features: Please make sure you have provided the correct filepath or dataframe, aborting...'
            )
            return

        if icfc.empty:
            print('\tGait feature dataframe is empty, aborting...')
            return

        p = figure(plot_width=1200,
                   plot_height=600,
                   x_axis_label='Time',
                   y_axis_label='m/s^2',
                   toolbar_location='above',
                   x_axis_type='datetime')
        # Plot vertical axis
        p1 = p.line(ts, y_accel, line_width=2, line_color='blue')

        # isolate ICs, FCs, and bout start/end times
        minima_time = []
        minima_signal = []
        maxima_time = []
        maxima_signal = []
        bout_starts = []
        bout_ends = []
        ics = pd.to_datetime(icfc.IC, unit='ms')
        fcs = pd.to_datetime(icfc.FC, unit='ms')
        icfc.bout_start_time = icfc.bout_start_time.astype(
            np.int64).values // 10**6
        bouts = icfc[['bout_number', 'bout_length_sec',
                      'bout_start_time']].drop_duplicates()
        for ic in ics:
            minima_time.append(ic)
            minima_signal.append(float(y_accel[ts.index[ts == ic]]))
        for fc in fcs:
            maxima_time.append(fc)
            maxima_signal.append(float(y_accel[ts.index[ts == fc]]))
        for row, bout in bouts.iterrows():
            bout_starts.append(bout.bout_start_time)
            bout_ends.append(bout.bout_start_time +
                             (bout.bout_length_sec * 1000))

        # add IC and FCs to plot
        p2 = p.circle(minima_time,
                      minima_signal,
                      size=15,
                      color="green",
                      alpha=0.5)
        p3 = p.circle(maxima_time,
                      maxima_signal,
                      size=15,
                      color="darkorange",
                      alpha=0.5)

        # add bout start and end times to plot
        for bout_start in bout_starts:
            start_bout_line = Span(location=bout_start,
                                   dimension='height',
                                   line_color='green',
                                   line_dash='solid',
                                   line_width=1.5)
            p.add_layout(start_bout_line)
        for bout_end in bout_ends:
            end_bout_line = Span(location=bout_end,
                                 dimension='height',
                                 line_color='red',
                                 line_dash='solid',
                                 line_width=1.5)
            p.add_layout(end_bout_line)

        # add legend
        legend = Legend(items=[("Acceleration", [p1]),
                               ("Initial contact", [p2]),
                               ("Final contact", [p3])],
                        location=(10, 300))

        # format plot
        p.add_layout(legend, 'right')
        p.xaxis.axis_label_text_font_size = "16pt"
        p.yaxis.axis_label_text_font_size = "16pt"
        p.axis.major_label_text_font_size = '16pt'
        p.title.align = 'center'
        p.title.text_font_size = '16pt'
        p.xaxis.axis_label_text_font_style = 'normal'
        p.yaxis.axis_label_text_font_style = 'normal'
        p.xaxis.axis_label_standoff = 5
        p.yaxis.axis_label_standoff = 20
        p.legend.label_text_font = "arial"
        p.legend.label_text_font_size = '16pt'
        p.legend.glyph_height = 30

        if show_plot:
            show(p)

        # save plot
        if result_file:
            try:
                if not result_file.endswith('.html'):
                    result_file += '.html'
                output_file(result_file)
                save(p)
            except:
                print(
                    'Unable to save data: Please make sure your results directory exists, aborting...'
                )
                return

        print('\tPlot complete!\n')