def load_data(event, paths, tmp):
    """Loads an event and processes the data, if necessary, from a trial
    into a GaitData object.

    Parameters
    ==========
    event : string
        A valid event for the given trial.
    paths : list of strings
        The paths to the mocap, record, and meta data files.
    tmp : string
        A path to a temporary directory in which the processed data can be
        stored.

    Returns
    =======
    gait_data : gaitanalysis.gait.GaitData
        The GaitData instance containing the data for the event.

    """

    file_name = '_'.join([n.lower() for n in event.split(' ')]) + '.h5'

    tmp_data_path = os.path.join(tmp, file_name)

    try:
        f = open(tmp_data_path, 'r')
    except IOError:
        print('Cleaning and processing {} data...'.format(event))
        # Load raw data, clean it up, and extract the perturbation section.
        dflow_data = DFlowData(*paths)
        dflow_data.clean_data(ignore_hbm=True)
        perturbed_df = \
            dflow_data.extract_processed_data(event=event,
                                              index_col='TimeStamp',
                                              isb_coordinates=True)

        # Compute the lower limb 2D inverse dynamics, identify right heel
        # strike times, and split the data into gait cycles.
        gait_data = GaitData(perturbed_df)
        marker_set = dflow_data.meta['trial']['marker-set']
        subject_mass = dflow_data.meta['subject']['mass']
        labels = markers_for_2D_inverse_dynamics(marker_set)
        args = list(labels) + [subject_mass, 6.0]
        gait_data.inverse_dynamics_2d(*args)
        gait_data.grf_landmarks('FP2.ForY', 'FP1.ForY',
                                filter_frequency=10.0,
                                threshold=27.0)
        gait_data.split_at('right', num_samples=80,
                           belt_speed_column='RightBeltSpeed')
        if not os.path.exists(tmp):
            os.makedirs(tmp)
        gait_data.save(tmp_data_path)
    else:
        print('Loading processed {} data from file...'.format(event))
        f.close()
        gait_data = GaitData(tmp_data_path)

    return gait_data
def measured_subject_mass(raw_data_dir, processed_data_dir):
    """This script computes the mean mass of each subject based on the force
    plate data collected just after the calibration pose. It also compares
    it to the mass provided by the subject. Some subjects may have invalid
    measurements and will not be included, so you should make use of the
    self reported mass.

    Parameters
    ----------
    raw_data_dir : string
        The path to the raw data directory.
    processed_data_dir : string
        The path to the processed data directory.

    Returns
    -------
    mean : pandas.DataFrame
        A data frame containing columns with mean/std measured mass, the
        self reported mass, and indexed by subject id.

    """
    # Subject 0 is for the null subject. For subject 1 we use the self
    # reported value because there is no "Calibration Pose" event. For
    # subject 11 and subject 4, we use the self reported mass because the
    # wooden feet were in place and the force measurements are
    # untrust-worthy.
    subj_with_invalid_meas = [0, 1, 4, 11]

    # Some of the trials have anomalies in the data after the calibration
    # pose due to the subjects' movement. The following gives best estimates
    # of the sections of the event that are suitable to use in the subjects'
    # mass computation. The entire time series during the "Calibration Pose"
    # event is acceptable for trials not listed.
    time_sections = {'020': (None, 14.0),
                     '021': (None, 14.0),
                     '031': (-14.0, None),
                     '047': (None, 12.0),
                     '048': (None, 7.0),
                     '055': (-12.0, None),
                     '056': (-3.0, None),  # also the first 2 seconds are good
                     '057': (-8.0, None),
                     '063': (None, 6.0),  # also the last 6 seconds are good
                     '069': (None, 14.0),
                     '078': (None, 15.0)}

    trial_dirs = [x[0] for x in os.walk(raw_data_dir) if x[0][-4] == 'T']
    trial_nums = [x[-3:] for x in trial_dirs if x[-3:] not in ['001', '002']]

    event = 'Calibration Pose'

    tmp_file_name = '_'.join(event.lower().split(' ')) + '.h5'
    tmp_data_path = os.path.join(processed_data_dir, tmp_file_name)

    if not os.path.exists(processed_data_dir):
        os.makedirs(processed_data_dir)

    subject_data = defaultdict(list)

    for trial_number in trial_nums:

        dflow_data = DFlowData(*trial_file_paths(raw_data_dir,
                                                 trial_number))

        subject_id = dflow_data.meta['subject']['id']

        if subject_id not in subj_with_invalid_meas:

            msg = 'Computing Mass for Trial #{}, Subject #{}'
            print(msg.format(trial_number, subject_id))
            print('=' * len(msg))

            try:
                f = open(tmp_data_path, 'r')
                df = pd.read_hdf(tmp_data_path, 'T' + trial_number)
            except (IOError, KeyError):
                print('Loading raw data files and cleaning...')
                dflow_data.clean_data(ignore_hbm=True)
                df = dflow_data.extract_processed_data(event=event,
                                                       index_col='TimeStamp',
                                                       isb_coordinates=True)
                df.to_hdf(tmp_data_path, 'T' + trial_number)
            else:
                msg = 'Loading preprocessed {} data from file...'
                print(msg.format(event))
                f.close()

            # This is the time varying mass during the calibration pose.
            df['Mass'] = (df['FP1.ForY'] + df['FP1.ForY']) / 9.81

            # This sets the slice indices so that only the portion of the
            # time series with valid data is used to compute the mass.
            if trial_number in time_sections:
                start = time_sections[trial_number][0]
                stop = time_sections[trial_number][1]
                if start is None:
                    stop = df.index[0] + stop
                elif stop is None:
                    start = df.index[-1] + start
            else:
                start = None
                stop = None

            valid = df['Mass'].loc[start:stop]

            actual_mass = valid.mean()
            std = valid.std()

            reported_mass = dflow_data.meta['subject']['mass']

            subject_data['Trial Number'].append(trial_number)
            subject_data['Subject ID'].append(dflow_data.meta['subject']['id'])
            subject_data['Self Reported Mass'].append(reported_mass)
            subject_data['Mean Measured Mass'].append(actual_mass)
            subject_data['Measured Mass Std. Dev.'].append(std)
            subject_data['Gender'].append(dflow_data.meta['subject']['gender'])

            print("Measured mass: {} kg".format(actual_mass))
            print("Self reported mass: {} kg".format(reported_mass))
            print("\n")

        else:

            pass

    subject_df = pd.DataFrame(subject_data)

    grouped = subject_df.groupby('Subject ID')

    mean = grouped.mean()

    mean['Diff'] = mean['Mean Measured Mass'] - mean['Self Reported Mass']

    # This sets the grouped standard deviation to the correct value
    # following uncertainty propagation for the mean function.

    def uncert(x):
        return np.sqrt(np.sum(x**2) / len(x))

    mean['Measured Mass Std. Dev.'] = \
        grouped.agg({'Measured Mass Std. Dev.': uncert})

    return mean
Exemple #3
0
script_path = os.path.realpath(__file__)
src_dir = os.path.dirname(script_path)

root_dir = os.path.realpath(os.path.join(src_dir, '..'))
root_dir = '../perturbed-data-paper/raw-data'
mass = []
height = []
age = []
gender = []
l1 = range(6, 36)
l2 = range(40, 81)

files = l1 + l2
for index in files:
    dflow_data = DFlowData(*load(index, root_dir))
    if 'subject' in dflow_data.meta.keys():
        if 'mass' in dflow_data.meta['subject'].keys():
            mass.append(dflow_data.meta['subject']['mass'])
            height.append(dflow_data.meta['subject']['height'])
            age.append(dflow_data.meta['subject']['age'])

            gender.append(dflow_data.meta['subject']['gender'])

print np.mean(mass)
print np.mean(height)

plt.figure(1)

x, y = getPDF(mass)
plt.subplot(221)
def load_data(event, paths, tmp):
    """Loads an event and processes the data, if necessary, from a trial
    into a GaitData object.

    Parameters
    ==========
    event : string
        A valid event for the given trial.
    paths : list of strings
        The paths to the mocap, record, and meta data files.
    tmp : string
        A path to a temporary directory in which the processed data can be
        stored.

    Returns
    =======
    gait_data : gaitanalysis.gait.GaitData
        The GaitData instance containing the data for the event.

    """

    file_name = '_'.join([n.lower() for n in event.split(' ')]) + '.h5'

    tmp_data_path = os.path.join(tmp, file_name)

    try:
        f = open(tmp_data_path, 'r')
    except IOError:
        print('Cleaning and processing {} data...'.format(event))
        # Load raw data, clean it up, and extract the perturbation section.
        dflow_data = DFlowData(*paths)
        dflow_data.clean_data(ignore_hbm=True)
        perturbed_df = \
            dflow_data.extract_processed_data(event=event,
                                              index_col='TimeStamp',
                                              isb_coordinates=True)

        # Compute the lower limb 2D inverse dynamics, identify right heel
        # strike times, and split the data into gait cycles.
        gait_data = GaitData(perturbed_df)
        marker_set = dflow_data.meta['trial']['marker-set']
        subject_mass = dflow_data.meta['subject']['mass']
        labels = markers_for_2D_inverse_dynamics(marker_set)
        args = list(labels) + [subject_mass, 6.0]
        gait_data.inverse_dynamics_2d(*args)
        gait_data.grf_landmarks('FP2.ForY',
                                'FP1.ForY',
                                filter_frequency=10.0,
                                threshold=27.0)
        gait_data.split_at('right',
                           num_samples=80,
                           belt_speed_column='RightBeltSpeed')
        if not os.path.exists(tmp):
            os.makedirs(tmp)
        gait_data.save(tmp_data_path)
    else:
        print('Loading processed {} data from file...'.format(event))
        f.close()
        gait_data = GaitData(tmp_data_path)

    return gait_data
def measured_subject_mass(raw_data_dir, processed_data_dir):
    """This script computes the mean mass of each subject based on the force
    plate data collected just after the calibration pose. It also compares
    it to the mass provided by the subject. Some subjects may have invalid
    measurements and will not be included, so you should make use of the
    self reported mass.

    Parameters
    ----------
    raw_data_dir : string
        The path to the raw data directory.
    processed_data_dir : string
        The path to the processed data directory.

    Returns
    -------
    mean : pandas.DataFrame
        A data frame containing columns with mean/std measured mass, the
        self reported mass, and indexed by subject id.

    """
    # Subject 0 is for the null subject. For subject 1 we use the self
    # reported value because there is no "Calibration Pose" event. For
    # subject 11 and subject 4, we use the self reported mass because the
    # wooden feet were in place and the force measurements are
    # untrust-worthy.
    subj_with_invalid_meas = [0, 1, 4, 11]

    # Some of the trials have anomalies in the data after the calibration
    # pose due to the subjects' movement. The following gives best estimates
    # of the sections of the event that are suitable to use in the subjects'
    # mass computation. The entire time series during the "Calibration Pose"
    # event is acceptable for trials not listed.
    time_sections = {
        '020': (None, 14.0),
        '021': (None, 14.0),
        '031': (-14.0, None),
        '047': (None, 12.0),
        '048': (None, 7.0),
        '055': (-12.0, None),
        '056': (-3.0, None),  # also the first 2 seconds are good
        '057': (-8.0, None),
        '063': (None, 6.0),  # also the last 6 seconds are good
        '069': (None, 14.0),
        '078': (None, 15.0)
    }

    trial_dirs = [x[0] for x in os.walk(raw_data_dir) if x[0][-4] == 'T']
    trial_nums = [x[-3:] for x in trial_dirs if x[-3:] not in ['001', '002']]

    event = 'Calibration Pose'

    tmp_file_name = '_'.join(event.lower().split(' ')) + '.h5'
    tmp_data_path = os.path.join(processed_data_dir, tmp_file_name)

    if not os.path.exists(processed_data_dir):
        os.makedirs(processed_data_dir)

    subject_data = defaultdict(list)

    for trial_number in trial_nums:

        dflow_data = DFlowData(*trial_file_paths(raw_data_dir, trial_number))

        subject_id = dflow_data.meta['subject']['id']

        if subject_id not in subj_with_invalid_meas:

            msg = 'Computing Mass for Trial #{}, Subject #{}'
            print(msg.format(trial_number, subject_id))
            print('=' * len(msg))

            try:
                f = open(tmp_data_path, 'r')
                df = pd.read_hdf(tmp_data_path, 'T' + trial_number)
            except (IOError, KeyError):
                print('Loading raw data files and cleaning...')
                dflow_data.clean_data(ignore_hbm=True)
                df = dflow_data.extract_processed_data(event=event,
                                                       index_col='TimeStamp',
                                                       isb_coordinates=True)
                df.to_hdf(tmp_data_path, 'T' + trial_number)
            else:
                msg = 'Loading preprocessed {} data from file...'
                print(msg.format(event))
                f.close()

            # This is the time varying mass during the calibration pose.
            df['Mass'] = (df['FP1.ForY'] + df['FP1.ForY']) / 9.81

            # This sets the slice indices so that only the portion of the
            # time series with valid data is used to compute the mass.
            if trial_number in time_sections:
                start = time_sections[trial_number][0]
                stop = time_sections[trial_number][1]
                if start is None:
                    stop = df.index[0] + stop
                elif stop is None:
                    start = df.index[-1] + start
            else:
                start = None
                stop = None

            valid = df['Mass'].loc[start:stop]

            actual_mass = valid.mean()
            std = valid.std()

            reported_mass = dflow_data.meta['subject']['mass']

            subject_data['Trial Number'].append(trial_number)
            subject_data['Subject ID'].append(dflow_data.meta['subject']['id'])
            subject_data['Self Reported Mass'].append(reported_mass)
            subject_data['Mean Measured Mass'].append(actual_mass)
            subject_data['Measured Mass Std. Dev.'].append(std)
            subject_data['Gender'].append(dflow_data.meta['subject']['gender'])

            print("Measured mass: {} kg".format(actual_mass))
            print("Self reported mass: {} kg".format(reported_mass))
            print("\n")

        else:

            pass

    subject_df = pd.DataFrame(subject_data)

    grouped = subject_df.groupby('Subject ID')

    mean = grouped.mean()

    mean['Diff'] = mean['Mean Measured Mass'] - mean['Self Reported Mass']

    # This sets the grouped standard deviation to the correct value
    # following uncertainty propagation for the mean function.

    def uncert(x):
        return np.sqrt(np.sum(x**2) / len(x))

    mean['Measured Mass Std. Dev.'] = \
        grouped.agg({'Measured Mass Std. Dev.': uncert})

    return mean
Exemple #6
0
def DataMoore(trials):
    antropometric = np.array([0, 0, 0])
    events = [
        'First Normal Walking', 'Longitudinal Perturbation',
        'Second Normal Walking'
    ]
    for j in events:
        if not os.path.exists(directory + '/' + j):
            os.makedirs(j)
        os.chdir(directory + '/' + j)
        for i in trials:
            trial = i
            Datadir = '/home/eprietop/Documents/perturbed-data-paper/raw-data/T0%s/' % trial
            data = DFlowData(Datadir+'mocap-0%s.txt'% trial,Datadir+'record-0%s.txt'% \
                                 trial,Datadir+'meta-0%s.yml'% trial)
            # =============================================================================
            # Setting the variables according to the marker and forces, referenced in the paper
            # =============================================================================
            try:
                mass = data.meta['subject']['mass']
                height = data.meta['subject']['height']
            except KeyError:
                mass = np.NaN
                height = np.NaN
            if [trial, mass, height] in antropometric:
                pass
            else:
                antropometric = np.vstack(
                    (antropometric, [trial, mass, height]))
            #events = data.meta['events']
            # List of markers for lower inverse dynamics
            body_parts = ['SHO', 'GTRO', 'LEK', 'LM', 'HEE', 'MT5']
            body_parts_l = [
                'L' + c + e for c in body_parts for e in ['.PosX', '.PosY']
            ]
            body_parts_r = [
                'R' + c + e for c in body_parts for e in ['.PosX', '.PosY']
            ]
            #List of forces cells
            forces_l = ['FP' + str(1) + c for c in ['.ForX', '.ForY', '.MomZ']]
            forces_r = ['FP' + str(2) + c for c in ['.ForX', '.ForY', '.MomZ']]

            data.clean_data()
            event_df = data.extract_processed_data(event=j)
            Gaitdata = gait.GaitData(event_df)
            Gaitdata.inverse_dynamics_2d(body_parts_l, body_parts_r, forces_l,
                                         forces_r, mass, 6.0)
            # We had to put a threshold of 20
            cycles = Gaitdata.grf_landmarks(forces_r[1],
                                            forces_l[1],
                                            threshhold=20.0,
                                            do_plot=False)
            cycles_df = pd.DataFrame(
                list(cycles), index=['R_strike', 'L_strike', 'R_off', 'L_off'])
            cycles_df.to_csv('Cycle_detection %s' % trial)
            #Taken the data processed
            Process_data = event_df.iloc[:, -31:].round(4)
            Process_data.to_csv('ID' + '_0%s' % trial)
            GRF_Y = pd.concat((event_df[forces_r[1]], event_df[forces_l[1]]),
                              axis=1)
            GRF_Y.to_csv('GRF %s' % trial)
        os.chdir("../")
    antro = pd.DataFrame(antropometric,
                         columns=['trial', 'mass [kg]', 'height [m]'])
    antro.to_csv('trial-anthropometries')
    os.chdir("../")
    return
processed_data_path = 'D:\Tian\Research\Projects\ML Project\gait_database_processed\GaitDatabase\data\\'
my_database_info = DatabaseInfo()
my_initializer = Initializer()
necessary_columns = my_database_info.get_necessary_columns()
marker_column_num = my_database_info.get_marker_column_num(
)  # get column numbers
force_column_num = my_database_info.get_force_column_num()
all_column_names = my_database_info.get_all_column_names()

for i_sub in range(12):
    print('sub: ' + str(i_sub))
    for i_speed in range(speed_num):
        file_names = my_database_info.get_file_names(sub=i_sub,
                                                     speed=i_speed,
                                                     path=data_path)
        trial_data = DFlowData(file_names[0], file_names[1], file_names[2])
        event_dictionary = trial_data.meta['trial']['events']

        trial_data.clean_data(interpolation_order=2)
        # events: A: Force Plate Zeroing    B: Calibration Pose     C: First Normal Walking
        # D: Longitudinal Perturbation      E: Second Normal Walking    F: Unloaded End

        cali_data_df = trial_data.extract_processed_data(
            event='Calibration Pose')
        cali_data_df.columns = all_column_names
        cali_data_df = cali_data_df[necessary_columns]
        walking_data_df_1 = trial_data.extract_processed_data(
            event='First Normal Walking')
        walking_data_df_1.columns = all_column_names
        walking_data_df_1 = walking_data_df_1[necessary_columns]
        walking_data_df_2 = trial_data.extract_processed_data(