def logreg_algo(trainingdataset='', traininglabels='', testingdataset='', testinglabels='', logger='', quickrun=True): if quickrun: logger.getLogger('tab.regular.time').info('starting training Logistic Regression Model.') logreg_model = LogisticRegression(n_jobs=-1, verbose=True) logreg_model = logreg_model.fit(trainingdataset, traininglabels[:].ravel()) results(logreg_model=logreg_model, trainingdataset=trainingdataset, traininglabels=traininglabels, testingdataset=testingdataset, testinglabels=testinglabels, logger=logger) else: for t in [0.0001, 0.00001, 0.000001, 0.0000001]: for c in [10, 100]: logger.getLogger('tab.regular.time').info('starting training Logistic Regression Model.') msg = '\tmodel parameters: \n ' \ '\t\tInverse of regularization strength:{0}' \ '\t\ttolerance:{1}' \ '\t\tmax iterations:{2}' \ '\t\tnumber of jobs:{3}'.format(c, t, 200, -1) printout(message=msg, verbose=True) logreg_model = LogisticRegression(C=c, tol=t, max_iter=200, n_jobs=-1, verbose=True) logreg_model = logreg_model.fit(trainingdataset[:], traininglabels[:]) results(logreg_model=logreg_model, trainingdataset=trainingdataset, traininglabels=traininglabels, testingdataset=testingdataset, testinglabels=testinglabels)
def window_step_properties(): """ get the window and step size for the featurization step :return: (list) window, step size """ window_size = window_property() step_size_errors = True while step_size_errors: step_size = raw_input('step size: ').upper() if step_size == '': step_size_errors = False step_size = 1 try: step_size = int(step_size) if step_size > 0: step_size_errors = False # default else: printout('Wrong number of step size. Select step size greater than 0') except ValueError as err_message: printout(message=err_message) window_step_size = [window_size, step_size] return window_step_size
def select_dataset_quickrun(algorithm=''): """ get the dataset directory and the quickrun option @:return if *HMM: filename, quickrun option and kmeans option else: filename, quickrun option """ # get the right dataset location file_path = get_set_dataset_location(matlab_or_dataset='dataset', default_folder='processed_dataset', action='get') quickrun = '' while quickrun == '': # quickrun options is to run only the short version of the selected algorithm # rather than the algorithm with multiple parameters quickrun_selection = raw_input('Quickrun: ').upper() print '' # get location of program if quickrun_selection == "": quickrun = True elif quickrun_selection == 'TRUE' or quickrun_selection == 'T': quickrun = True elif quickrun_selection == 'FALSE' or quickrun_selection == 'F': quickrun = False else: msg = 'Error. Wrong option for quickrun selected.' printout(message=msg, verbose=True) if algorithm == 'GHMM' or algorithm == 'GMMHMM': kmeans = raw_input('kmeans (regular or mini): ').upper() if kmeans == '': kmeans = 'REGULAR' batch = raw_input('batch setting: ').upper() if batch == '' or batch == 'FALSE': batch = False else: batch = True window_size = window_property() n_states = int(raw_input('Number of HMM states: ')) if n_states == '': n_states = 8 elif (n_states != 8) and (n_states != 10) and (n_states != 20): raise ValueError('Wrong state number') return file_path, quickrun, kmeans, batch, window_size, n_states else: return file_path, quickrun
def run(self): printout(message='Pre-processing {0}. Data:{1}'.format( self.name, datetime.now().strftime('%Y-%m-%d %H:%M:%S')), verbose=True) self.dataset_normalized, self.labels = hmm_preprocessing_data( dataset=self.dataset) printout(message='Finished pre-processing {0}. Data:{1}'.format( self.name, datetime.now().strftime('%Y-%m-%d %H:%M:%S')), verbose=True) self.thread_done = True
def load_data(data_dir): # data file are store within the project dataset folder dataset_files = os.listdir(data_dir) # sensordata variable sensordata_array = np.empty(shape=(0, 0)) # dataset user information dataset_user_information = list() # loop through every file for python_file in dataset_files: python_file_path = os.path.join(data_dir, python_file) print 'reading file: {0}'.format(python_file) # getting file information user, activity = file_information(python_file) start_index = np.shape(sensordata_array)[0] # read data from file raw_data = np.load(python_file_path) # adding the info to the sensordata list if start_index == 0: sensordata_array = raw_data.copy() else: sensordata_array = np.append(arr=sensordata_array, values=raw_data, axis=0) # get the max length of the added dataset end_index = np.shape(sensordata_array)[0] user_prop = UserInfo(user=user, activity=activity, start_index=start_index, end_index=end_index) dataset_user_information.append(user_prop) print '\tuser={0} activity={1}, start/end index={2}'.format( user, activity, (start_index, end_index)) printout(message='\tdata stored in dataframe\n', verbose=True) return sensordata_array, dataset_user_information
def load_data(data_dir): # data file are store within the project dataset folder dataset_files = os.listdir(data_dir) # sensordata variable sensordata_dataframe = pd.DataFrame() # dataset user information dataset_user_information = list() # loop through every file for python_file in dataset_files: python_file_path = os.path.join(data_dir, python_file) msg = 'reading file: {0}'.format(python_file) printout(message=msg, verbose=True) # getting file information user, activity = file_information(python_file) start_index = sensordata_dataframe.shape[0] # read data from file raw_data = np.load(python_file_path) # convert the array to dataframe df_data = pd.DataFrame(raw_data) # append does not happen in place so its stored back in data_dataframe sensordata_dataframe = sensordata_dataframe.append(df_data) end_index = sensordata_dataframe.shape[0] user_prop = UserInfo(user=user, activity=activity, start_index=start_index, end_index=end_index) dataset_user_information.append(user_prop) print '\tuser={0} activity={1}, start/end index={2}'.format( user, activity, (start_index, end_index)) printout(message='\tdata stored in dataframe\n', verbose=True) sensordata_dataframe.index = range(0, sensordata_dataframe.shape[0]) sensordata_dataframe.columns = range(0, sensordata_dataframe.shape[1]) return sensordata_dataframe, dataset_user_information
def move_matlab(): """ moves and organizes matlab files based on a hierarchical folder with the activity as parent and then on users """ # default option is the dropbox directory initial_path = get_set_dataset_location(matlab_or_dataset='matlab', default_folder='/Users/jguerra/Dropbox/SensorJorge') forwarding_error = True while forwarding_error: forwarding_path = raw_input('Matlab forwarding directory: ') print forwarding_path if forwarding_path == "": # get location of program final_path = os.path.join(program_path, 'sensordata') print final_path else: if os.path.isdir(forwarding_path): final_path = forwarding_path elif os.path.isfile(forwarding_path): final_path = os.path.join(program_path, forwarding_path) try: # if does not exists, create it if not os.path.exists(final_path): os.makedirs(final_path) except OSError: msg = 'Error. {0} directory cannot be created.'.format(final_path) printout(message=msg, verbose=True) msg = 'Please chose a different forwarding directory.' printout(message=msg, verbose=True) else: forwarding_error = False msg = 'starting moving matlab files from {0} to {1}'.format(initial_path, final_path) logging.getLogger('').info(msg) move_matlab_files(initial_path, forwarding_path) msg = 'finished moving matlab files from {0} to {1}'.format(initial_path, final_path) logging.getLogger('').info(msg)
def get_set_dataset_location(matlab_or_dataset, default_folder='', action=''): """ Gets the filename of the datasets or the directory of the matlab files :return: if dataset: filename if matlab: directory """ not_correct_dataset_location = True while not_correct_dataset_location: if matlab_or_dataset == 'matlab': file_directory = raw_input('Matlab directory: ') if not os.path.isdir(file_directory) or not os.path.exists(file_directory): msg = 'Error. Wrong directory provided. Please, provide correct directory.' printout(message=msg, verbose=True) continue else: file_path = file_directory not_correct_dataset_location = False else: if action == 'get': filename = raw_input('Input dataset filename: ') else: filename = raw_input('Output dataset filename: ') # get location of program if filename == "": msg = 'Error. No filename provided. Please, insert filename.' printout(message=msg, verbose=True) continue else: if default_folder: folder_filename = os.path.join(default_folder, filename + '.hdf5') file_path = os.path.join(program_path, folder_filename) # right dataset directory was provided not_correct_dataset_location = False return file_path
def window_property(): window_size_errors = True while window_size_errors: window_size = raw_input('window size: ') # default if window_size == '': window_size_errors = False window_size = 60 try: window_size = int(window_size) if window_size == 30 or window_size == 60 or window_size == 120: window_size_errors = False else: printout('Wrong number of window size. Options:30,60 or 120') except ValueError as err_message: printout(message=err_message) return window_size
def imu_algorithm(doc, algorithm='', quickrun='', logger='', kmeans='', window_size='', n_states='', batched_setting=False): # type: (object, str, boolean, str, object, str, bool) -> object label_object = MatlabLabels() h5_file_object = h5py.File(doc.input_path_filename, 'r') # printing a line for style and visibility printout(message='', verbose=True) for user_index, user_info in enumerate(h5_file_object.iterkeys()): # only consider control users that are not performing feeding activity if ('pilot' in user_info) and ('feeding' not in user_info): # run test on control users only # if 'pilot' in user_info and \ # ('HS00' in user_info or 'N537' in user_info or 'Q130' in user_info or 'Q430' in user_info or 'Q435' in # user_info): # if 'pilot' in user_info and \ # ('Q439' in user_info or 'Q568' in user_info or 'Q615' in user_info or 'Q616' in user_info or 'Q617' in # user_info) and ('feeding' not in user_info): # get user, activity and activity type user = h5_file_object[user_info].attrs['user'] activity = h5_file_object[user_info].attrs['activity'] # initialize user object base_object = base.Base(input_path=doc.input_path, filename=user_info, user=user, activity=activity, dataset=h5_file_object[user_info], window_size=window_size, n_states=n_states) msg = 'Starting analysing {0}'.format(user_info) logger.getLogger('regular.time').info(msg) msg = 'Calculating training and testing dataset' logger.getLogger('regular.time').info(msg) # flag used to include or exclude user to/from training dataset adding = False # total number of users being considered total_inner_users = len(h5_file_object) - 1 # fetch training data from the objects without : # 1. the testing user dataset # 2. other dataset with the same user and activity for u_index, user_info_inner in enumerate( h5_file_object.iterkeys()): # get the attributes of the training example inner_user = h5_file_object[user_info_inner].attrs['user'] inner_activity = h5_file_object[user_info_inner].attrs[ 'activity'] # add the activities for the other control users that are not performing the feeding activity if (inner_user != user) and ('paretic' not in user_info_inner) and ('feeding' not in user_info_inner)\ and (activity == inner_activity): adding = True # string to print for logging information print_str = '{0} (user index {1} of {2})'.format( user_info_inner, u_index, total_inner_users) if adding: # add user data to the vertical or horizontal dataset base_object.add_dataset( dataset=h5_file_object[user_info_inner]) msg = 'Including {0}'.format(print_str) logger.getLogger('tab.regular').info(msg) # reset flag adding = False elif not adding: msg = 'Excluding {0}'.format(print_str) logger.getLogger('tab.regular').info(msg) else: msg = 'Error while processing {0}. User was not added'.format( print_str) logger.getLogger('tab.regular').error(msg) raise ValueError(msg) # logging information for dataset_type in ['Training', 'Testing']: data_size, label_size = base_object.get_shape(dataset_type) msg = '{0} data size:{1}'.format(dataset_type, data_size) logger.getLogger('line.tab.regular').info(msg) msg = '{0} labels size:{0}'.format(dataset_type, label_size) logger.getLogger('tab.regular').info(msg) logger.getLogger('tab.regular').info('') try: if algorithm == 'GHMM' or algorithm == 'GMMHMM': hmm_algo(base_object=base_object, algorithm=algorithm, batched_setting=batched_setting, logger=logger, kmeans=kmeans, quickrun=quickrun, n_states=n_states) # elif algorithm == 'Logistic Regression': # logreg_algo(trainingdataset=training_data_object, traininglabels=training_label_object, # quickrun=quickrun, testingdataset=testing_data_object, logger=logger, # testinglabels=testing_label_object) # # elif algorithm == 'LSTM': # lstm_algo(trainingdataset=training_data_object, traininglabels=training_label_object, # testingdataset=testing_data_object, testinglabels=testing_label_object, # lengths=training_dataset_lengths, logger=logger) else: printout(message='Wrong algorithm provided.', verbose=True) msg = 'Finished analysing {0}'.format(user_info) logger.getLogger('tab.regular.time.line').info(msg) except ValueError as error_message: msg = 'Error while analysing {0}'.format(user_info) logger.getLogger('tab.regular.time').error(msg) logger.getLogger('tab.regular.time.line').error(error_message) # closing and deleting h5py file base_object.close_and_delete()
def extract_data_and_save_to_file(labels_array='', ignored_indices='', dataset='', motion_class='', dataset_path='', current_file_name=''): # variable to store all the segments and vectors values data = np.empty((1, 1)) for vector in motion_class.vectorsUsed: v_data = dataset[vector] if 'joint' == vector: for joints in motion_class.jointUsed: sensor_data = v_data[0][0][joints][0][0][2:] number_row, number_column = sensor_data.shape _, ds_column = data.shape # temporary array temp_array = sensor_data # if ds_column is 1 it is the first iteration and special measures have # to be taken into consideration when specifying the size of the array if not # check this condition, then the code would break trying to add the data if ds_column != 1: # create new array with extra index for new data temp_array = np.zeros( (number_row, number_column + ds_column)) # merge data temp_array[:, 0:ds_column] = data temp_array[:, ds_column:] = sensor_data # add values to the final variable data = np.vstack(temp_array) else: for segments in motion_class.segmentUsed: # obtains the values based on the segments and vectors used sensor_data = v_data[0][0][segments][0][0][2:] number_row, number_column = sensor_data.shape _, ds_column = data.shape # temporary array temp_array = sensor_data # if ds_column is 1 it is the first iteration and special measures have # to be taken into consideration when specifying the size of the array if not # check this condition, then the code would break trying to add the data if ds_column != 1: # create new array with extra index for new data temp_array = np.zeros( (number_row, number_column + ds_column)) # merge data temp_array[:, 0:ds_column] = data temp_array[:, ds_column:] = sensor_data # add values to the final variable data = np.vstack(temp_array) import IPython IPython.embed() # merge data with their respective labels tmp_arr = '' try: printout(message='\tMerging data and labels arrays', verbose=True) tmp_arr = np.c_[data, labels_array] except ValueError: msg = '\tsize of data: {0}'.format(np.shape(data)) printout(message=msg, verbose=True) msg = '\tsize of labels: {0}'.format(np.shape(labels_array)) printout(message=msg, verbose=True, extraspaces=2) exit(1) if len(ignored_indices) != 0: printout(message='\tRemoving \'Ignored\' labels', verbose=True) data_labels = remove_ignores(tmp_arr, ignored_indices) else: data_labels = tmp_arr # this information will be used to train the hmm since its important to know the start and end of # an activity in order for the EM algo to not learn from non-concurrent activities n_datapoints = np.shape(data_labels)[0] array_length = np.zeros([n_datapoints, 1]) array_length[0, 0] = n_datapoints dataset = np.c_[data_labels, array_length] # current user and activity based on the file name user, activity, leftright = file_information(current_file_name) # list of files already processed files_processed = [ pfile for pfile in listdir(dataset_path) if isfile(join(dataset_path, pfile)) ] # list of user already processed users_processed = [ pfile for pfile in files_processed if (user in pfile and activity in pfile) ] # concatenate users performing the same activities for uprocessed in users_processed: old_data = np.load(uprocessed) tmp_arr = np.r(old_data, dataset) dataset = tmp_arr new_file_name = user + '_' + leftright + '_' + activity current_out_path = os.path.join(dataset_path, new_file_name) msg = '\tOutput file directory: {0}'.format(current_out_path) printout(message=msg, verbose=True) np.save(current_out_path, dataset)
def extract_mat_information(doc, matlab_directory, action, leftright_arm, script_path, output_path_filename='', pareticnonparetic=''): """ Extracts the relevant information about the directories of the matlab files being considered updates two variables: doct.matlab_files_names_dict[activity]: key is the activity and the value is the matlab file's name doc.matlab_files_path_dict[activity]: key is the activity and the value is the matlab file's path """ working_path = script_path # current working path if matlab_directory == "": # including folder SensorData where all the matlab files are located doc.input_path = os.path.join(working_path, 'SensorData') else: doc.input_path = matlab_directory # check data_path if not os.path.exists(doc.input_path): msg = "File " + doc.data_path + " does not exist" printout(message=msg, verbose=True) exit(1) if action == 'extract': doc.output_path = os.path.join(working_path, 'converted_dataset') if not os.path.exists(doc.output_path): os.makedirs(doc.output_path) doc.output_path_filename = output_path_filename # all the activities i.e. Shelf_High_Heavycan, Shelf_Low_Heavycan, etc... doc.activity_list = next(os.walk(doc.input_path))[1] # used for experimental patients if pareticnonparetic: # set of motions and labels motion_class = MatlabLabels() # get list of right or left dexterity user specific_patients = motion_class.s_patients_dexterity[leftright_arm] upattern = r'(^[A-Z]+[0-9]+)_[nonparetic|paretic]+_[active|nonactive]+_([a-z]+_high|[a-z]+_low|[a-z]+).*\.mat$' # loop through activities for activity in doc.activity_list: matlab_path_list = list() matlab_files_list = list() # obtain the path based on that activity activity_path = os.path.join(doc.input_path, activity) # loop through each subject of the current activity for (subject_path, _, matlab_file_list) in os.walk(activity_path, topdown=False): for matlab_file in matlab_file_list: # check if matlab files and not hidden files if '.mat' in matlab_file and not matlab_file.startswith('.'): # use for checking files if leftright_arm == "": # full matlab path matlab_path_list.append( os.path.join(subject_path, matlab_file)) matlab_files_list.append(matlab_file) doc.count += 1 # use when extracting control patients files elif leftright_arm in matlab_file: # full matlab path matlab_path_list.append( os.path.join(subject_path, matlab_file)) matlab_files_list.append(matlab_file) doc.count += 1 # use when extracting experimental patients files elif pareticnonparetic and pareticnonparetic in matlab_file: add_file = False # if paretic or non-paretic information was provided if pareticnonparetic != "": user_information = re.match(pattern=upattern, string=matlab_file) # check whether the user is in the right or left side list if user_information.group(1) in specific_patients: add_file = True # if it was not provided, add all the users else: add_file = True if add_file: # full matlab path matlab_path_list.append( os.path.join(subject_path, matlab_file)) matlab_files_list.append(matlab_file) doc.count += 1 # add the respective matlab files to their specific activities doc.matlab_files_path_dict[activity] = matlab_path_list doc.matlab_files_names_dict[activity] = matlab_files_list
def extract_data_and_save_to_file(labels_array='', ignored_indices='', dataset='', motion_class='', outfile_object='', current_file_name=''): # variable to store all the segments and vectors values data = np.empty((1, 1)) for vector in motion_class.vectorsUsed: v_data = dataset[vector] if 'joint' == vector: for joints in motion_class.jointUsed: sensor_data = v_data[0][0][joints][0][0][2:] number_row, number_column = sensor_data.shape _, ds_column = data.shape # temporary array temp_array = sensor_data # if ds_column is 1 it is the first iteration and special measures have # to be taken into consideration when specifying the size of the array if not # check this condition, then the code would break trying to add the data if ds_column != 1: # create new array with extra index for new data temp_array = np.zeros( (number_row, number_column + ds_column)) # merge data temp_array[:, 0:ds_column] = data temp_array[:, ds_column:] = sensor_data # add values to the final variable data = np.vstack(temp_array) else: for segments in motion_class.segmentUsed: # obtains the values based on the segments and vectors used sensor_data = v_data[0][0][segments][0][0][2:] number_row, number_column = sensor_data.shape _, ds_column = data.shape # temporary array temp_array = sensor_data # if ds_column is 1 it is the first iteration and special measures have # to be taken into consideration when specifying the size of the array if not # check this condition, then the code would break trying to add the data if ds_column != 1: # create new array with extra index for new data temp_array = np.zeros( (number_row, number_column + ds_column)) # merge data temp_array[:, 0:ds_column] = data temp_array[:, ds_column:] = sensor_data # add values to the final variable data = np.vstack(temp_array) # merge data with their respective labels tmp_arr = '' try: printout(message='\tMerging data and labels arrays', verbose=True) tmp_arr = np.c_[data, labels_array] except ValueError: msg = '\tsize of data: {0}'.format(np.shape(data)) printout(message=msg, verbose=True) msg = '\tsize of labels: {0}'.format(np.shape(labels_array)) printout(message=msg, verbose=True, extraspaces=2) exit(1) if len(ignored_indices) != 0: printout(message='\tRemoving \'Ignored\' labels', verbose=True) data_labels = remove_ignores(tmp_arr, ignored_indices) else: data_labels = tmp_arr outfile_object.create_dataset(name=current_file_name, data=data_labels) add_attributes(outfile_object[current_file_name], current_file_name)
def data_collection(file_properties, debugging, extract, logger): # set of motions and labels motion_class = MatlabLabels() # flags used for headers in the log file first_pass_ever = True if extract: out_file = h5py.File(file_properties.output_path_filename, 'w') # loop through each activity for activity, matlab_file_list in file_properties.matlab_files_path_dict.iteritems( ): # used for logging information first_pass_activity = True # loop through all the matlab files for index_matlab_file, matlab_file in enumerate(matlab_file_list): # get name of matlab file matlab_file_name = file_properties.matlab_files_names_dict[ activity][index_matlab_file] msg = 'On activity: {0}'.format(activity) logger.getLogger('regular').info(msg) msg = 'Accessing file: {0}'.format(matlab_file_name) logger.getLogger('regular').info(msg) if matlab_file_name not in matlab_file: error_msg = 'Fatal Error. missing file={0} for activity={1}'.format( matlab_file_name, activity) logger.getLogger('regular').error(error_msg) if not debugging: raise ValueError(error_msg) # keep track of error within specific files temp_log_file_content = list() # variable to know whether matlab was access correctly matlab_access = True # load matlab file content try: matlab_content = sio.loadmat(matlab_file) except ValueError as v_error_msg: error_msg = 'File {0} cannot be accessed\n {1}'.format( matlab_file_name, v_error_msg) error_message_func(logger=logger, error_message=error_msg, debugging=debugging) matlab_access = False if matlab_access: msg = 'Matlab content has been loaded' logger.getLogger('tab.regular').info(msg) if not ('tree' in matlab_content): error_msg = 'Fatal error. tree structure does not exists.' error_message_func(error_message=error_msg, debugging=debugging, logger=logger) else: printout(message='\ttree structure exists', verbose=True) # this is temporary because if the specific file has no errors then no information # about the file will be written temp_log_file_content = list() # tree2 contains the sensor data tree2 = True if not ('tree2' in matlab_content): error_msg = 'Fatal error. tree2 structure does not exists.' error_message_func(error_message=error_msg, debugging=debugging, logger=logger) tree2 = False else: print '\ttree2 structure exists' # markerExtract contains the label data marker_extract = True # check for marketExtract if not ('markerExtract' in matlab_content): error_msg = 'Fatal error. MarkerExtract structure does not exists.\n' error_message_func(error_message=error_msg, debugging=debugging, logger=logger) marker_extract = False if not debugging: raise ValueError(error_msg) else: printout(message='\tMarkerExtract structure exists', verbose=True) # structure where the labels are store if marker_extract: if 'paretic' in matlab_file_name: if '_paretic_' in matlab_file_name: right_left_paretic_nonparetic_hand_expectation = 'P_' elif '_nonparetic_' in matlab_file_name: right_left_paretic_nonparetic_hand_expectation = 'N_' else: print 'Failed to distinguished between paretic or nonparetic patient.' exit(1) else: if '_r_' in matlab_file_name: right_left_paretic_nonparetic_hand_expectation = 'R_' elif '_l_' in matlab_file_name: right_left_paretic_nonparetic_hand_expectation = 'L_' else: print 'Failed to distinguished between right or left hand patient.' exit(1) # get label data printout(message='\tAccessing MarkerExtract data', verbose=True) # row_data[0][0] = label # row_data[1][0][0] = time step data_array = matlab_content['markerExtract'] # use for checking incoming labels start_flag = True end_flag = False wrong_begin_end_label = False previous_label = '' expected_label = '' # used to overcome index roadblocks first_pass = True # keep track of new labels new_label_list = list() # values corresponding labels last_timestep = -1 # variable to store labels label_list = list() # ignored list indices ignore_index_list = list() # total number of steps recorded by tree structure total_number_timesteps = \ matlab_content['tree']['subject'][0][0]['frames'][0][0]['frame'][0][0]['index'][0][-1][0][0] msg = '\tTotal time step size for tree structure:{0}'.format( total_number_timesteps) printout(message=msg, verbose=True) printout(message='\ttraversing data array', verbose=True) total_number_labels = len(data_array) - 1 # READING FILE # loop through each row in the markerExtract file for current_row_number, data in enumerate(data_array): if total_number_timesteps < current_row_number: error_msg = 'mismatch between number of time steps of tree and markerExtract' error_message_func(line=current_row_number, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # check for label try: # read label and convert it upper case current_label = str(data[0][0]).upper() # if empty row_value except ValueError: error_message_func(line=current_row_number, error_message='missing label', debugging=debugging, logger=temp_log_file_content, label='') except IndexError: error_msg = 'Failed to get label. Probably empty cell []' error_message_func(line=current_row_number, error_message=error_msg, debugging=debugging, logger=temp_log_file_content, label='') # switch flags start_flag = not start_flag end_flag = not end_flag continue # check for time step try: # read the time step current_timestep = data[1][0][0].astype(int) # if empty row_value except ValueError: error_message_func( line=current_row_number, error_message='missing time step', label='', debugging=debugging, logger=temp_log_file_content) # switch flags start_flag = not start_flag end_flag = not end_flag continue msg = '\t\tdata cell information: row={0} label={1} timestep={2}'.format( current_row_number + 1, current_label, current_timestep) printout(message=msg, verbose=True) # remove space in the label if ' ' in current_label: error_message_func(line=current_row_number, error_message='extra space', debugging=debugging, logger=temp_log_file_content, label=current_label) # removed space in label current_label = str(current_label).replace(" ", "") if not (right_left_paretic_nonparetic_hand_expectation in current_label) and \ not ('IGNORE_B' in current_label) and not ('IGNORE_E' in current_label): error_msg = 'Expecting label to start with \'' + \ right_left_paretic_nonparetic_hand_expectation + '\'' error_message_func(line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) tmp_label = list(current_label) tmp_label[ 0: 2] = right_left_paretic_nonparetic_hand_expectation current_label = ("".join(tmp_label)).upper() if right_left_paretic_nonparetic_hand_expectation in current_label: tmp_label = list(current_label) # removes 'R, L, N or P' in the label in order to find the label in the label class current_label = "".join(tmp_label[1:]) # check timestep are increasing if last_timestep > current_timestep and not first_pass: error_msg = 'timestep=' + str(current_timestep + 3) + ' Expected timestep > ' + \ str(last_timestep + 3) error_message_func(line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # check if wrong '_B' or '_E' label was previously encountered # if wrong label was seeing, then forget expected label and start again if wrong_begin_end_label: if '_B' in current_label: # switch flags start_flag = True end_flag = False elif '_E' in current_label: # switch flags start_flag = False end_flag = True else: error_msg = 'Wrong label suffix' error_message_func(line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=logger) wrong_begin_end_label = False # start of activity if start_flag: # make sure the 'Begging' label exists if not ('_B' in current_label): if previous_label: error_msg = 'Expecting label ending in \'_B\' since the last label was \'' + \ previous_label + '\'' else: error_msg = 'Expecting label ending in \'_B\'' error_message_func(line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=logger) wrong_begin_end_label = True else: # reduce the starting position by 3 (model's specifications) current_timestep -= 3 # For a '_B' label, the timestep or value of the label has to increase by one from the # previous timestep/value's label. For the error message, we increase it by 2 because # of the requirements of the matlab file (project specific) if last_timestep != current_timestep and not first_pass: error_msg = 'timestep=' + str(current_timestep + 3) + ' Expected timestep >= ' + \ str(last_timestep + 3) error_message_func( line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # error check # create a new label in order to compare it to the next label temporary_variable = list(current_label) temporary_variable[-1] = 'E' # store label for future comparison expected_label = ( "".join(temporary_variable)).upper() # '_E' label elif end_flag: # reduce the starting position by 2 (model's specifications) and in order to account # for python not including the last index current_timestep -= 2 # make sure the 'Ending' label exists # compare current label to the expected (obtained from changing the previous/start label if not ('_E' in current_label ) or current_label != expected_label: if previous_label: error_msg = 'Expecting label ending in \'_E\' since the last label was \'' + \ previous_label + '\'' else: error_msg = 'Expecting label ending in \'_E\'' error_message_func( line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) wrong_begin_end_label = True # need to get the label index based on the '_B' label in the motion_class.label list tmp_var = list(current_label) tmp_var[-1] = 'B' new_label = ("".join(tmp_var)).upper() # check if the motion exists if not (new_label in motion_class.labels): error_msg = 'Unknown label' error_message_func( line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # keep record of new labels new_label_list.append(current_label) # print 'Finishing program. Unknown label.' # always finish the program # exit(1) if not debugging: if 'IGNORE' in current_label: current_class_label_index = motion_class.labels.index( current_label) else: try: # instead of threading _T_A0A1_E and _T_B2B1_E different, treat them as _T_E # i.e. as a compact label splitted_label = current_label.split( "_")[1] # check if * or non-characters in the label remove_non_characters = re.compile( '[^a-zA-Z]') splitted_label = remove_non_characters.sub( '', splitted_label) # get index of compact label current_class_label_index = motion_class.compact_list.index( splitted_label) except ValueError: msg = 'Error: label was not found in the compact label list'.format( current_label) error_message_func( line=current_row_number, error_message=msg, debugging=debugging, label=current_label, logger=temp_log_file_content) # provide the same labels to multiple time steps for hmm algorithm label_range = current_timestep - last_timestep label_list.extend([current_class_label_index] * label_range) else: error_msg = 'Error while changing start and end checks' error_message_func(line=current_row_number, error_message=error_msg, debugging=debugging, label='', logger=temp_log_file_content) if first_pass: if current_timestep != 0: error_msg = 'Time step does not start at 3' error_message_func( line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # store label for future comparison previous_label = current_label # switch flags start_flag = not start_flag end_flag = not end_flag first_pass = False # check for multiple motions per label label_used = [ current_label for pMotions in motion_class.possible_motions if (pMotions in current_label) ] if len(label_used) > 1: error_msg = 'Error: More than one motion in the same label' error_message_func(line=current_row_number, label=current_label, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # check for ignored labels/timesteps if 'IGNORE_E' in current_label: ignore_index_list.append( [last_timestep, current_timestep]) last_timestep = current_timestep if total_number_labels == current_row_number: # check last label ended in a '_E' label if '_E' not in current_label: error_msg = 'last label in markerextract is not a \'_E\' label' error_message_func( line=current_row_number, error_message=error_msg, debugging=debugging, logger=temp_log_file_content) # check size of labels and size dataset if (total_number_timesteps + 1) != last_timestep: error_msg = 'mismatch between number of time steps of tree ({0}) and labels in the ' \ 'markerExtract ({1})'.format(total_number_timesteps + 1, last_timestep) error_message_func( error_message=error_msg, debugging=debugging, logger=temp_log_file_content) if not debugging and extract and tree2: label_array = np.array(label_list) msg = '\tObtaining sensor data from file {0}'.format( matlab_file_name) printout(message=msg, verbose=True) # extracting sensors' data extract_data_and_save_to_file( labels_array=label_array, ignored_indices=ignore_index_list, dataset=matlab_content['tree2'], motion_class=motion_class, current_file_name=matlab_file_name, outfile_object=out_file) msg = '\tFinished storing sensor data for file {0}'.format( matlab_file_name) printout(message=msg, verbose=True) if len(temp_log_file_content) != 0: if first_pass_ever: file_properties.output_file_object.write( 'List of errors: \n\n') first_pass_ever = False if first_pass_activity: # print activity with error file_properties.output_file_object.write( 'activity: {0}\n\n'.format(activity)) first_pass_activity = False # print the filename file_properties.output_file_object.write(matlab_file_name) file_properties.output_file_object.write('\n') # loop through the errors and print them for log_line in temp_log_file_content: file_properties.output_file_object.write(log_line) file_properties.output_file_object.write('\n') try: if len(new_label_list) != 0: file_properties.output_file_object.write( '\tSet of new labels: \n\n') for new_labels in new_label_list: file_properties.output_file_object.write( '\t' + new_labels + '\n') file_properties.output_file_object.write('\n') # new_label_list was not defined except NameError: continue if not extract: file_properties.output_file_object.write( 'Done checking matlab files\n')
def forwarding_filters(): """ obtain : right or left arm [paretic, nonparetic,(experimental group) or nothing (control group) active or nonactive (experimental group) :return: all the filters: leftright_arm, specific_side """ right_left_error = True while right_left_error: leftright_arm = raw_input('right(r) or left(l): ').upper() if leftright_arm == 'L' or leftright_arm == 'LEFT': leftright_arm = '_l_' elif leftright_arm == 'R' or leftright_arm == 'RIGHT': leftright_arm = '_r_' else: msg = 'No side specified. Please specified a side.' printout(message=msg, verbose=True) continue right_left_error = False paretic_nonparetic_errors = True while paretic_nonparetic_errors: specific_side = '' paretic_nonparetic_enter = raw_input('paretic(p), non-paretic(n) or neither(\'enter\'): ').upper() if paretic_nonparetic_enter == 'P' or paretic_nonparetic_enter == 'PARETIC': pareticnonparetic = 'paretic' elif paretic_nonparetic_enter == 'N' or paretic_nonparetic_enter == 'NONPARETIC': pareticnonparetic = 'nonparetic' elif paretic_nonparetic_enter == '': pareticnonparetic = '' else: msg = 'Wrong option selected.' printout(message=msg, verbose=True) continue paretic_nonparetic_errors = False active_nonactive_errors = True while active_nonactive_errors: if pareticnonparetic == 'paretic': activenonactive = raw_input('active(a) or non-active(n): ').upper() if activenonactive == 'A' or activenonactive == 'ACTIVE': specific_side = '_paretic_active_' elif activenonactive == 'N' or activenonactive == 'NONACTIVE': specific_side = '_paretic_nonactive_' else: msg = 'Wrong option selected.' printout(message=msg, verbose=True) continue elif pareticnonparetic == 'nonparetic': activenonactive = raw_input('active or non-active: ').upper() if activenonactive == 'A' or activenonactive == 'ACTIVE': specific_side = '_nonparetic_active_' elif activenonactive == 'N' or activenonactive == 'NONACTIVE': specific_side = '_nonparetic_nonactive_' else: msg = 'Wrong option selected.' printout(message=msg, verbose=True) continue active_nonactive_errors = False return leftright_arm, specific_side
print '9: Exit' print '' try: selected_option = int(raw_input('Select an option: ')) if selected_option == 1: ml_algorithm('GHMM') elif selected_option == 2: ml_algorithm('GMMHMM') elif selected_option == 3: check_matlab() elif selected_option == 4: convert_featurize_matlab('extract') elif selected_option == 5: convert_featurize_matlab('featurize') elif selected_option == 6: move_matlab() elif selected_option == 7: ml_algorithm('Logistic Regression') elif selected_option == 8: ml_algorithm('LSTM') elif selected_option == 9: exit_program() else: printout(message='Wrong option selected.', verbose=True) except ValueError as error_message: logging.getLogger('regular').error(error_message) raise ValueError(error_message)