def load_data(event, paths, tmp): """Loads an event and processes the data, if necessary, from a trial into a GaitData object. Parameters ========== event : string A valid event for the given trial. paths : list of strings The paths to the mocap, record, and meta data files. tmp : string A path to a temporary directory in which the processed data can be stored. Returns ======= gait_data : gaitanalysis.gait.GaitData The GaitData instance containing the data for the event. """ file_name = '_'.join([n.lower() for n in event.split(' ')]) + '.h5' tmp_data_path = os.path.join(tmp, file_name) try: f = open(tmp_data_path, 'r') except IOError: print('Cleaning and processing {} data...'.format(event)) # Load raw data, clean it up, and extract the perturbation section. dflow_data = DFlowData(*paths) dflow_data.clean_data(ignore_hbm=True) perturbed_df = \ dflow_data.extract_processed_data(event=event, index_col='TimeStamp', isb_coordinates=True) # Compute the lower limb 2D inverse dynamics, identify right heel # strike times, and split the data into gait cycles. gait_data = GaitData(perturbed_df) marker_set = dflow_data.meta['trial']['marker-set'] subject_mass = dflow_data.meta['subject']['mass'] labels = markers_for_2D_inverse_dynamics(marker_set) args = list(labels) + [subject_mass, 6.0] gait_data.inverse_dynamics_2d(*args) gait_data.grf_landmarks('FP2.ForY', 'FP1.ForY', filter_frequency=10.0, threshold=27.0) gait_data.split_at('right', num_samples=80, belt_speed_column='RightBeltSpeed') if not os.path.exists(tmp): os.makedirs(tmp) gait_data.save(tmp_data_path) else: print('Loading processed {} data from file...'.format(event)) f.close() gait_data = GaitData(tmp_data_path) return gait_data
def measured_subject_mass(raw_data_dir, processed_data_dir): """This script computes the mean mass of each subject based on the force plate data collected just after the calibration pose. It also compares it to the mass provided by the subject. Some subjects may have invalid measurements and will not be included, so you should make use of the self reported mass. Parameters ---------- raw_data_dir : string The path to the raw data directory. processed_data_dir : string The path to the processed data directory. Returns ------- mean : pandas.DataFrame A data frame containing columns with mean/std measured mass, the self reported mass, and indexed by subject id. """ # Subject 0 is for the null subject. For subject 1 we use the self # reported value because there is no "Calibration Pose" event. For # subject 11 and subject 4, we use the self reported mass because the # wooden feet were in place and the force measurements are # untrust-worthy. subj_with_invalid_meas = [0, 1, 4, 11] # Some of the trials have anomalies in the data after the calibration # pose due to the subjects' movement. The following gives best estimates # of the sections of the event that are suitable to use in the subjects' # mass computation. The entire time series during the "Calibration Pose" # event is acceptable for trials not listed. time_sections = {'020': (None, 14.0), '021': (None, 14.0), '031': (-14.0, None), '047': (None, 12.0), '048': (None, 7.0), '055': (-12.0, None), '056': (-3.0, None), # also the first 2 seconds are good '057': (-8.0, None), '063': (None, 6.0), # also the last 6 seconds are good '069': (None, 14.0), '078': (None, 15.0)} trial_dirs = [x[0] for x in os.walk(raw_data_dir) if x[0][-4] == 'T'] trial_nums = [x[-3:] for x in trial_dirs if x[-3:] not in ['001', '002']] event = 'Calibration Pose' tmp_file_name = '_'.join(event.lower().split(' ')) + '.h5' tmp_data_path = os.path.join(processed_data_dir, tmp_file_name) if not os.path.exists(processed_data_dir): os.makedirs(processed_data_dir) subject_data = defaultdict(list) for trial_number in trial_nums: dflow_data = DFlowData(*trial_file_paths(raw_data_dir, trial_number)) subject_id = dflow_data.meta['subject']['id'] if subject_id not in subj_with_invalid_meas: msg = 'Computing Mass for Trial #{}, Subject #{}' print(msg.format(trial_number, subject_id)) print('=' * len(msg)) try: f = open(tmp_data_path, 'r') df = pd.read_hdf(tmp_data_path, 'T' + trial_number) except (IOError, KeyError): print('Loading raw data files and cleaning...') dflow_data.clean_data(ignore_hbm=True) df = dflow_data.extract_processed_data(event=event, index_col='TimeStamp', isb_coordinates=True) df.to_hdf(tmp_data_path, 'T' + trial_number) else: msg = 'Loading preprocessed {} data from file...' print(msg.format(event)) f.close() # This is the time varying mass during the calibration pose. df['Mass'] = (df['FP1.ForY'] + df['FP1.ForY']) / 9.81 # This sets the slice indices so that only the portion of the # time series with valid data is used to compute the mass. if trial_number in time_sections: start = time_sections[trial_number][0] stop = time_sections[trial_number][1] if start is None: stop = df.index[0] + stop elif stop is None: start = df.index[-1] + start else: start = None stop = None valid = df['Mass'].loc[start:stop] actual_mass = valid.mean() std = valid.std() reported_mass = dflow_data.meta['subject']['mass'] subject_data['Trial Number'].append(trial_number) subject_data['Subject ID'].append(dflow_data.meta['subject']['id']) subject_data['Self Reported Mass'].append(reported_mass) subject_data['Mean Measured Mass'].append(actual_mass) subject_data['Measured Mass Std. Dev.'].append(std) subject_data['Gender'].append(dflow_data.meta['subject']['gender']) print("Measured mass: {} kg".format(actual_mass)) print("Self reported mass: {} kg".format(reported_mass)) print("\n") else: pass subject_df = pd.DataFrame(subject_data) grouped = subject_df.groupby('Subject ID') mean = grouped.mean() mean['Diff'] = mean['Mean Measured Mass'] - mean['Self Reported Mass'] # This sets the grouped standard deviation to the correct value # following uncertainty propagation for the mean function. def uncert(x): return np.sqrt(np.sum(x**2) / len(x)) mean['Measured Mass Std. Dev.'] = \ grouped.agg({'Measured Mass Std. Dev.': uncert}) return mean
script_path = os.path.realpath(__file__) src_dir = os.path.dirname(script_path) root_dir = os.path.realpath(os.path.join(src_dir, '..')) root_dir = '../perturbed-data-paper/raw-data' mass = [] height = [] age = [] gender = [] l1 = range(6, 36) l2 = range(40, 81) files = l1 + l2 for index in files: dflow_data = DFlowData(*load(index, root_dir)) if 'subject' in dflow_data.meta.keys(): if 'mass' in dflow_data.meta['subject'].keys(): mass.append(dflow_data.meta['subject']['mass']) height.append(dflow_data.meta['subject']['height']) age.append(dflow_data.meta['subject']['age']) gender.append(dflow_data.meta['subject']['gender']) print np.mean(mass) print np.mean(height) plt.figure(1) x, y = getPDF(mass) plt.subplot(221)
def measured_subject_mass(raw_data_dir, processed_data_dir): """This script computes the mean mass of each subject based on the force plate data collected just after the calibration pose. It also compares it to the mass provided by the subject. Some subjects may have invalid measurements and will not be included, so you should make use of the self reported mass. Parameters ---------- raw_data_dir : string The path to the raw data directory. processed_data_dir : string The path to the processed data directory. Returns ------- mean : pandas.DataFrame A data frame containing columns with mean/std measured mass, the self reported mass, and indexed by subject id. """ # Subject 0 is for the null subject. For subject 1 we use the self # reported value because there is no "Calibration Pose" event. For # subject 11 and subject 4, we use the self reported mass because the # wooden feet were in place and the force measurements are # untrust-worthy. subj_with_invalid_meas = [0, 1, 4, 11] # Some of the trials have anomalies in the data after the calibration # pose due to the subjects' movement. The following gives best estimates # of the sections of the event that are suitable to use in the subjects' # mass computation. The entire time series during the "Calibration Pose" # event is acceptable for trials not listed. time_sections = { '020': (None, 14.0), '021': (None, 14.0), '031': (-14.0, None), '047': (None, 12.0), '048': (None, 7.0), '055': (-12.0, None), '056': (-3.0, None), # also the first 2 seconds are good '057': (-8.0, None), '063': (None, 6.0), # also the last 6 seconds are good '069': (None, 14.0), '078': (None, 15.0) } trial_dirs = [x[0] for x in os.walk(raw_data_dir) if x[0][-4] == 'T'] trial_nums = [x[-3:] for x in trial_dirs if x[-3:] not in ['001', '002']] event = 'Calibration Pose' tmp_file_name = '_'.join(event.lower().split(' ')) + '.h5' tmp_data_path = os.path.join(processed_data_dir, tmp_file_name) if not os.path.exists(processed_data_dir): os.makedirs(processed_data_dir) subject_data = defaultdict(list) for trial_number in trial_nums: dflow_data = DFlowData(*trial_file_paths(raw_data_dir, trial_number)) subject_id = dflow_data.meta['subject']['id'] if subject_id not in subj_with_invalid_meas: msg = 'Computing Mass for Trial #{}, Subject #{}' print(msg.format(trial_number, subject_id)) print('=' * len(msg)) try: f = open(tmp_data_path, 'r') df = pd.read_hdf(tmp_data_path, 'T' + trial_number) except (IOError, KeyError): print('Loading raw data files and cleaning...') dflow_data.clean_data(ignore_hbm=True) df = dflow_data.extract_processed_data(event=event, index_col='TimeStamp', isb_coordinates=True) df.to_hdf(tmp_data_path, 'T' + trial_number) else: msg = 'Loading preprocessed {} data from file...' print(msg.format(event)) f.close() # This is the time varying mass during the calibration pose. df['Mass'] = (df['FP1.ForY'] + df['FP1.ForY']) / 9.81 # This sets the slice indices so that only the portion of the # time series with valid data is used to compute the mass. if trial_number in time_sections: start = time_sections[trial_number][0] stop = time_sections[trial_number][1] if start is None: stop = df.index[0] + stop elif stop is None: start = df.index[-1] + start else: start = None stop = None valid = df['Mass'].loc[start:stop] actual_mass = valid.mean() std = valid.std() reported_mass = dflow_data.meta['subject']['mass'] subject_data['Trial Number'].append(trial_number) subject_data['Subject ID'].append(dflow_data.meta['subject']['id']) subject_data['Self Reported Mass'].append(reported_mass) subject_data['Mean Measured Mass'].append(actual_mass) subject_data['Measured Mass Std. Dev.'].append(std) subject_data['Gender'].append(dflow_data.meta['subject']['gender']) print("Measured mass: {} kg".format(actual_mass)) print("Self reported mass: {} kg".format(reported_mass)) print("\n") else: pass subject_df = pd.DataFrame(subject_data) grouped = subject_df.groupby('Subject ID') mean = grouped.mean() mean['Diff'] = mean['Mean Measured Mass'] - mean['Self Reported Mass'] # This sets the grouped standard deviation to the correct value # following uncertainty propagation for the mean function. def uncert(x): return np.sqrt(np.sum(x**2) / len(x)) mean['Measured Mass Std. Dev.'] = \ grouped.agg({'Measured Mass Std. Dev.': uncert}) return mean
def DataMoore(trials): antropometric = np.array([0, 0, 0]) events = [ 'First Normal Walking', 'Longitudinal Perturbation', 'Second Normal Walking' ] for j in events: if not os.path.exists(directory + '/' + j): os.makedirs(j) os.chdir(directory + '/' + j) for i in trials: trial = i Datadir = '/home/eprietop/Documents/perturbed-data-paper/raw-data/T0%s/' % trial data = DFlowData(Datadir+'mocap-0%s.txt'% trial,Datadir+'record-0%s.txt'% \ trial,Datadir+'meta-0%s.yml'% trial) # ============================================================================= # Setting the variables according to the marker and forces, referenced in the paper # ============================================================================= try: mass = data.meta['subject']['mass'] height = data.meta['subject']['height'] except KeyError: mass = np.NaN height = np.NaN if [trial, mass, height] in antropometric: pass else: antropometric = np.vstack( (antropometric, [trial, mass, height])) #events = data.meta['events'] # List of markers for lower inverse dynamics body_parts = ['SHO', 'GTRO', 'LEK', 'LM', 'HEE', 'MT5'] body_parts_l = [ 'L' + c + e for c in body_parts for e in ['.PosX', '.PosY'] ] body_parts_r = [ 'R' + c + e for c in body_parts for e in ['.PosX', '.PosY'] ] #List of forces cells forces_l = ['FP' + str(1) + c for c in ['.ForX', '.ForY', '.MomZ']] forces_r = ['FP' + str(2) + c for c in ['.ForX', '.ForY', '.MomZ']] data.clean_data() event_df = data.extract_processed_data(event=j) Gaitdata = gait.GaitData(event_df) Gaitdata.inverse_dynamics_2d(body_parts_l, body_parts_r, forces_l, forces_r, mass, 6.0) # We had to put a threshold of 20 cycles = Gaitdata.grf_landmarks(forces_r[1], forces_l[1], threshhold=20.0, do_plot=False) cycles_df = pd.DataFrame( list(cycles), index=['R_strike', 'L_strike', 'R_off', 'L_off']) cycles_df.to_csv('Cycle_detection %s' % trial) #Taken the data processed Process_data = event_df.iloc[:, -31:].round(4) Process_data.to_csv('ID' + '_0%s' % trial) GRF_Y = pd.concat((event_df[forces_r[1]], event_df[forces_l[1]]), axis=1) GRF_Y.to_csv('GRF %s' % trial) os.chdir("../") antro = pd.DataFrame(antropometric, columns=['trial', 'mass [kg]', 'height [m]']) antro.to_csv('trial-anthropometries') os.chdir("../") return
processed_data_path = 'D:\Tian\Research\Projects\ML Project\gait_database_processed\GaitDatabase\data\\' my_database_info = DatabaseInfo() my_initializer = Initializer() necessary_columns = my_database_info.get_necessary_columns() marker_column_num = my_database_info.get_marker_column_num( ) # get column numbers force_column_num = my_database_info.get_force_column_num() all_column_names = my_database_info.get_all_column_names() for i_sub in range(12): print('sub: ' + str(i_sub)) for i_speed in range(speed_num): file_names = my_database_info.get_file_names(sub=i_sub, speed=i_speed, path=data_path) trial_data = DFlowData(file_names[0], file_names[1], file_names[2]) event_dictionary = trial_data.meta['trial']['events'] trial_data.clean_data(interpolation_order=2) # events: A: Force Plate Zeroing B: Calibration Pose C: First Normal Walking # D: Longitudinal Perturbation E: Second Normal Walking F: Unloaded End cali_data_df = trial_data.extract_processed_data( event='Calibration Pose') cali_data_df.columns = all_column_names cali_data_df = cali_data_df[necessary_columns] walking_data_df_1 = trial_data.extract_processed_data( event='First Normal Walking') walking_data_df_1.columns = all_column_names walking_data_df_1 = walking_data_df_1[necessary_columns] walking_data_df_2 = trial_data.extract_processed_data(