Esempio n. 1
0
def logreg_algo(trainingdataset='', traininglabels='', testingdataset='', testinglabels='', logger='', quickrun=True):

    if quickrun:

        logger.getLogger('tab.regular.time').info('starting training Logistic Regression Model.')
        logreg_model = LogisticRegression(n_jobs=-1, verbose=True)
        logreg_model = logreg_model.fit(trainingdataset, traininglabels[:].ravel())

        results(logreg_model=logreg_model, trainingdataset=trainingdataset, traininglabels=traininglabels,
                testingdataset=testingdataset, testinglabels=testinglabels, logger=logger)

    else:

        for t in [0.0001, 0.00001, 0.000001, 0.0000001]:
            for c in [10, 100]:
                logger.getLogger('tab.regular.time').info('starting training Logistic Regression Model.')
                msg = '\tmodel parameters: \n ' \
                      '\t\tInverse of regularization strength:{0}' \
                      '\t\ttolerance:{1}' \
                      '\t\tmax iterations:{2}' \
                      '\t\tnumber of jobs:{3}'.format(c, t, 200, -1)
                printout(message=msg, verbose=True)
                logreg_model = LogisticRegression(C=c, tol=t, max_iter=200, n_jobs=-1, verbose=True)
                logreg_model = logreg_model.fit(trainingdataset[:], traininglabels[:])

                results(logreg_model=logreg_model, trainingdataset=trainingdataset, traininglabels=traininglabels,
                        testingdataset=testingdataset, testinglabels=testinglabels)
Esempio n. 2
0
def window_step_properties():
    """
    get the window and step size for the featurization step
    :return: (list) window, step size
    """

    window_size = window_property()

    step_size_errors = True
    while step_size_errors:
        step_size = raw_input('step size: ').upper()

        if step_size == '':
            step_size_errors = False
            step_size = 1

        try:
            step_size = int(step_size)
            if step_size > 0:
                step_size_errors = False
            # default

            else:
                printout('Wrong number of step size. Select step size greater than 0')
        except ValueError as err_message:
            printout(message=err_message)

    window_step_size = [window_size, step_size]

    return window_step_size
Esempio n. 3
0
def select_dataset_quickrun(algorithm=''):
    """
    get the dataset directory and the quickrun option
    @:return
        if *HMM: filename, quickrun option and kmeans option
        else: filename, quickrun option
    """
    # get the right dataset location
    file_path = get_set_dataset_location(matlab_or_dataset='dataset', default_folder='processed_dataset', action='get')

    quickrun = ''
    while quickrun == '':

        # quickrun options is to run only the short version of the selected algorithm
        # rather than the algorithm with multiple parameters
        quickrun_selection = raw_input('Quickrun: ').upper()
        print ''

        # get location of program
        if quickrun_selection == "":
            quickrun = True
        elif quickrun_selection == 'TRUE' or quickrun_selection == 'T':
            quickrun = True
        elif quickrun_selection == 'FALSE' or quickrun_selection == 'F':
            quickrun = False
        else:
            msg = 'Error. Wrong option for quickrun selected.'
            printout(message=msg, verbose=True)

    if algorithm == 'GHMM' or algorithm == 'GMMHMM':
        kmeans = raw_input('kmeans (regular or mini): ').upper()

        if kmeans == '':
            kmeans = 'REGULAR'

        batch = raw_input('batch setting: ').upper()

        if batch == '' or batch == 'FALSE':
            batch = False
        else:
            batch = True

        window_size = window_property()

        n_states = int(raw_input('Number of HMM states: '))

        if n_states == '':
            n_states = 8
        elif (n_states != 8) and (n_states != 10) and (n_states != 20):
            raise ValueError('Wrong state number')

        return file_path, quickrun, kmeans, batch, window_size, n_states

    else:
        return file_path, quickrun
Esempio n. 4
0
    def run(self):
        printout(message='Pre-processing {0}. Data:{1}'.format(
            self.name,
            datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
                 verbose=True)
        self.dataset_normalized, self.labels = hmm_preprocessing_data(
            dataset=self.dataset)
        printout(message='Finished pre-processing {0}. Data:{1}'.format(
            self.name,
            datetime.now().strftime('%Y-%m-%d %H:%M:%S')),
                 verbose=True)

        self.thread_done = True
Esempio n. 5
0
def load_data(data_dir):

    # data file are store within the project dataset folder
    dataset_files = os.listdir(data_dir)

    # sensordata variable
    sensordata_array = np.empty(shape=(0, 0))

    # dataset user information
    dataset_user_information = list()

    # loop through every file
    for python_file in dataset_files:
        python_file_path = os.path.join(data_dir, python_file)
        print 'reading file: {0}'.format(python_file)
        # getting file information
        user, activity = file_information(python_file)
        start_index = np.shape(sensordata_array)[0]

        # read data from file
        raw_data = np.load(python_file_path)
        # adding the info to the sensordata list
        if start_index == 0:
            sensordata_array = raw_data.copy()
        else:
            sensordata_array = np.append(arr=sensordata_array,
                                         values=raw_data,
                                         axis=0)
        # get the max length of the added dataset
        end_index = np.shape(sensordata_array)[0]

        user_prop = UserInfo(user=user,
                             activity=activity,
                             start_index=start_index,
                             end_index=end_index)

        dataset_user_information.append(user_prop)

        print '\tuser={0} activity={1}, start/end index={2}'.format(
            user, activity, (start_index, end_index))

        printout(message='\tdata stored in dataframe\n', verbose=True)

    return sensordata_array, dataset_user_information
Esempio n. 6
0
def load_data(data_dir):

    # data file are store within the project dataset folder
    dataset_files = os.listdir(data_dir)

    # sensordata variable
    sensordata_dataframe = pd.DataFrame()

    # dataset user information
    dataset_user_information = list()

    # loop through every file
    for python_file in dataset_files:
        python_file_path = os.path.join(data_dir, python_file)
        msg = 'reading file: {0}'.format(python_file)
        printout(message=msg, verbose=True)
        # getting file information
        user, activity = file_information(python_file)
        start_index = sensordata_dataframe.shape[0]

        # read data from file
        raw_data = np.load(python_file_path)
        # convert the array to dataframe
        df_data = pd.DataFrame(raw_data)
        # append does not happen in place so its stored back in data_dataframe
        sensordata_dataframe = sensordata_dataframe.append(df_data)
        end_index = sensordata_dataframe.shape[0]

        user_prop = UserInfo(user=user,
                             activity=activity,
                             start_index=start_index,
                             end_index=end_index)

        dataset_user_information.append(user_prop)

        print '\tuser={0} activity={1}, start/end index={2}'.format(
            user, activity, (start_index, end_index))

        printout(message='\tdata stored in dataframe\n', verbose=True)

    sensordata_dataframe.index = range(0, sensordata_dataframe.shape[0])
    sensordata_dataframe.columns = range(0, sensordata_dataframe.shape[1])

    return sensordata_dataframe, dataset_user_information
Esempio n. 7
0
def move_matlab():
    """
    moves and organizes matlab files based on a hierarchical folder with the activity as parent and then on users
    """

    # default option is the dropbox directory
    initial_path = get_set_dataset_location(matlab_or_dataset='matlab',
                                            default_folder='/Users/jguerra/Dropbox/SensorJorge')

    forwarding_error = True
    while forwarding_error:

        forwarding_path = raw_input('Matlab forwarding directory: ')
        print forwarding_path

        if forwarding_path == "":
            # get location of program
            final_path = os.path.join(program_path, 'sensordata')
            print final_path

        else:
            if os.path.isdir(forwarding_path):
                final_path = forwarding_path
            elif os.path.isfile(forwarding_path):
                final_path = os.path.join(program_path, forwarding_path)

        try:
            # if does not exists, create it
            if not os.path.exists(final_path):
                os.makedirs(final_path)
        except OSError:
            msg = 'Error. {0} directory cannot be created.'.format(final_path)
            printout(message=msg, verbose=True)
            msg = 'Please chose a different forwarding directory.'
            printout(message=msg, verbose=True)
        else:
            forwarding_error = False

    msg = 'starting moving matlab files from {0} to {1}'.format(initial_path, final_path)
    logging.getLogger('').info(msg)

    move_matlab_files(initial_path, forwarding_path)
    msg = 'finished moving matlab files from {0} to {1}'.format(initial_path, final_path)
    logging.getLogger('').info(msg)
Esempio n. 8
0
def get_set_dataset_location(matlab_or_dataset, default_folder='', action=''):
    """
    Gets the filename of the datasets or the directory of the matlab files
    :return:
        if dataset: filename
        if matlab: directory
    """

    not_correct_dataset_location = True
    while not_correct_dataset_location:

        if matlab_or_dataset == 'matlab':
            file_directory = raw_input('Matlab directory: ')

            if not os.path.isdir(file_directory) or not os.path.exists(file_directory):
                msg = 'Error. Wrong directory provided. Please, provide correct directory.'
                printout(message=msg, verbose=True)
                continue

            else:
                file_path = file_directory
                not_correct_dataset_location = False

        else:
            if action == 'get':
                filename = raw_input('Input dataset filename: ')
            else:
                filename = raw_input('Output dataset filename: ')

            # get location of program
            if filename == "":
                msg = 'Error. No filename provided. Please, insert filename.'
                printout(message=msg, verbose=True)
                continue
            else:
                if default_folder:

                    folder_filename = os.path.join(default_folder, filename + '.hdf5')
                    file_path = os.path.join(program_path, folder_filename)
                    # right dataset directory was provided
                    not_correct_dataset_location = False

    return file_path
Esempio n. 9
0
def window_property():
    window_size_errors = True

    while window_size_errors:
        window_size = raw_input('window size: ')

        # default
        if window_size == '':
            window_size_errors = False
            window_size = 60

        try:
            window_size = int(window_size)
            if window_size == 30 or window_size == 60 or window_size == 120:
                window_size_errors = False
            else:
                printout('Wrong number of window size. Options:30,60 or 120')
        except ValueError as err_message:
            printout(message=err_message)

    return window_size
Esempio n. 10
0
def imu_algorithm(doc,
                  algorithm='',
                  quickrun='',
                  logger='',
                  kmeans='',
                  window_size='',
                  n_states='',
                  batched_setting=False):
    # type: (object, str, boolean, str, object, str, bool) -> object

    label_object = MatlabLabels()

    h5_file_object = h5py.File(doc.input_path_filename, 'r')

    # printing a line for style and visibility
    printout(message='', verbose=True)

    for user_index, user_info in enumerate(h5_file_object.iterkeys()):

        # only consider control users that are not performing feeding activity
        if ('pilot' in user_info) and ('feeding' not in user_info):
            # run test on control users only
            # if 'pilot' in user_info and \
            #        ('HS00' in user_info or 'N537' in user_info or 'Q130' in user_info or 'Q430' in user_info or 'Q435' in
            #            user_info):

            # if 'pilot' in user_info and \
            #         ('Q439' in user_info or 'Q568' in user_info or 'Q615' in user_info or 'Q616' in user_info or 'Q617' in
            #             user_info) and ('feeding' not in user_info):

            # get user, activity and activity type
            user = h5_file_object[user_info].attrs['user']
            activity = h5_file_object[user_info].attrs['activity']

            # initialize user object
            base_object = base.Base(input_path=doc.input_path,
                                    filename=user_info,
                                    user=user,
                                    activity=activity,
                                    dataset=h5_file_object[user_info],
                                    window_size=window_size,
                                    n_states=n_states)

            msg = 'Starting analysing {0}'.format(user_info)
            logger.getLogger('regular.time').info(msg)

            msg = 'Calculating training and testing dataset'
            logger.getLogger('regular.time').info(msg)

            # flag used to include or exclude user to/from training dataset
            adding = False

            # total number of users being considered
            total_inner_users = len(h5_file_object) - 1

            # fetch training data from the objects without :
            #   1. the testing user dataset
            #   2. other dataset with the same user and activity
            for u_index, user_info_inner in enumerate(
                    h5_file_object.iterkeys()):

                # get the attributes of the training example
                inner_user = h5_file_object[user_info_inner].attrs['user']
                inner_activity = h5_file_object[user_info_inner].attrs[
                    'activity']

                # add the activities for the other control users that are not performing the feeding activity
                if (inner_user != user) and ('paretic' not in user_info_inner) and ('feeding' not in user_info_inner)\
                        and (activity == inner_activity):
                    adding = True

                # string to print for logging information
                print_str = '{0} (user index {1} of {2})'.format(
                    user_info_inner, u_index, total_inner_users)

                if adding:

                    # add user data to the vertical or horizontal dataset
                    base_object.add_dataset(
                        dataset=h5_file_object[user_info_inner])

                    msg = 'Including {0}'.format(print_str)
                    logger.getLogger('tab.regular').info(msg)

                    # reset flag
                    adding = False

                elif not adding:
                    msg = 'Excluding {0}'.format(print_str)
                    logger.getLogger('tab.regular').info(msg)

                else:
                    msg = 'Error while processing {0}. User was not added'.format(
                        print_str)
                    logger.getLogger('tab.regular').error(msg)
                    raise ValueError(msg)

            # logging information
            for dataset_type in ['Training', 'Testing']:
                data_size, label_size = base_object.get_shape(dataset_type)
                msg = '{0} data size:{1}'.format(dataset_type, data_size)
                logger.getLogger('line.tab.regular').info(msg)

                msg = '{0} labels size:{0}'.format(dataset_type, label_size)
                logger.getLogger('tab.regular').info(msg)

            logger.getLogger('tab.regular').info('')
            try:
                if algorithm == 'GHMM' or algorithm == 'GMMHMM':
                    hmm_algo(base_object=base_object,
                             algorithm=algorithm,
                             batched_setting=batched_setting,
                             logger=logger,
                             kmeans=kmeans,
                             quickrun=quickrun,
                             n_states=n_states)

                # elif algorithm == 'Logistic Regression':
                #     logreg_algo(trainingdataset=training_data_object, traininglabels=training_label_object,
                #                 quickrun=quickrun, testingdataset=testing_data_object, logger=logger,
                #                 testinglabels=testing_label_object)
                #
                # elif algorithm == 'LSTM':
                #     lstm_algo(trainingdataset=training_data_object, traininglabels=training_label_object,
                #               testingdataset=testing_data_object, testinglabels=testing_label_object,
                #               lengths=training_dataset_lengths, logger=logger)

                else:
                    printout(message='Wrong algorithm provided.', verbose=True)

                msg = 'Finished analysing {0}'.format(user_info)
                logger.getLogger('tab.regular.time.line').info(msg)

            except ValueError as error_message:
                msg = 'Error while analysing {0}'.format(user_info)
                logger.getLogger('tab.regular.time').error(msg)
                logger.getLogger('tab.regular.time.line').error(error_message)

            # closing and deleting h5py file
            base_object.close_and_delete()
Esempio n. 11
0
def extract_data_and_save_to_file(labels_array='',
                                  ignored_indices='',
                                  dataset='',
                                  motion_class='',
                                  dataset_path='',
                                  current_file_name=''):

    # variable to store all the segments and vectors values
    data = np.empty((1, 1))

    for vector in motion_class.vectorsUsed:

        v_data = dataset[vector]
        if 'joint' == vector:
            for joints in motion_class.jointUsed:
                sensor_data = v_data[0][0][joints][0][0][2:]
                number_row, number_column = sensor_data.shape
                _, ds_column = data.shape
                # temporary array
                temp_array = sensor_data
                # if ds_column is 1 it is the first iteration and special measures have
                # to be taken into consideration when specifying the size of the array if not
                # check this condition, then the code would break trying to add the data
                if ds_column != 1:
                    # create new array with extra index for new data
                    temp_array = np.zeros(
                        (number_row, number_column + ds_column))
                    # merge data
                    temp_array[:, 0:ds_column] = data
                    temp_array[:, ds_column:] = sensor_data
                # add values to the final variable
                data = np.vstack(temp_array)
        else:
            for segments in motion_class.segmentUsed:
                # obtains the values based on the segments and vectors used
                sensor_data = v_data[0][0][segments][0][0][2:]
                number_row, number_column = sensor_data.shape
                _, ds_column = data.shape
                # temporary array
                temp_array = sensor_data
                # if ds_column is 1 it is the first iteration and special measures have
                # to be taken into consideration when specifying the size of the array if not
                # check this condition, then the code would break trying to add the data
                if ds_column != 1:
                    # create new array with extra index for new data
                    temp_array = np.zeros(
                        (number_row, number_column + ds_column))
                    # merge data
                    temp_array[:, 0:ds_column] = data
                    temp_array[:, ds_column:] = sensor_data
                # add values to the final variable
                data = np.vstack(temp_array)

    import IPython
    IPython.embed()

    # merge data with their respective labels
    tmp_arr = ''
    try:
        printout(message='\tMerging data and labels arrays', verbose=True)
        tmp_arr = np.c_[data, labels_array]

    except ValueError:
        msg = '\tsize of data: {0}'.format(np.shape(data))
        printout(message=msg, verbose=True)
        msg = '\tsize of labels: {0}'.format(np.shape(labels_array))
        printout(message=msg, verbose=True, extraspaces=2)
        exit(1)

    if len(ignored_indices) != 0:
        printout(message='\tRemoving \'Ignored\' labels', verbose=True)
        data_labels = remove_ignores(tmp_arr, ignored_indices)
    else:
        data_labels = tmp_arr

    # this information will be used to train the hmm since its important to know the start and end of
    # an activity in order for the EM algo to not learn from non-concurrent activities
    n_datapoints = np.shape(data_labels)[0]
    array_length = np.zeros([n_datapoints, 1])
    array_length[0, 0] = n_datapoints
    dataset = np.c_[data_labels, array_length]

    # current user and activity based on the file name
    user, activity, leftright = file_information(current_file_name)

    # list of files already processed
    files_processed = [
        pfile for pfile in listdir(dataset_path)
        if isfile(join(dataset_path, pfile))
    ]

    # list of user already processed
    users_processed = [
        pfile for pfile in files_processed
        if (user in pfile and activity in pfile)
    ]

    # concatenate users performing the same activities
    for uprocessed in users_processed:
        old_data = np.load(uprocessed)
        tmp_arr = np.r(old_data, dataset)
        dataset = tmp_arr

    new_file_name = user + '_' + leftright + '_' + activity
    current_out_path = os.path.join(dataset_path, new_file_name)

    msg = '\tOutput file directory: {0}'.format(current_out_path)
    printout(message=msg, verbose=True)
    np.save(current_out_path, dataset)
Esempio n. 12
0
def extract_mat_information(doc,
                            matlab_directory,
                            action,
                            leftright_arm,
                            script_path,
                            output_path_filename='',
                            pareticnonparetic=''):
    """
    Extracts the relevant information about the directories of the matlab files being considered
    updates two variables:
        doct.matlab_files_names_dict[activity]: key is the activity and the value is the matlab file's name
        doc.matlab_files_path_dict[activity]: key is the activity and the value is the matlab file's path
    """

    working_path = script_path
    # current working path
    if matlab_directory == "":
        # including folder SensorData where all the matlab files are located
        doc.input_path = os.path.join(working_path, 'SensorData')
    else:
        doc.input_path = matlab_directory

    # check data_path
    if not os.path.exists(doc.input_path):
        msg = "File " + doc.data_path + " does not exist"
        printout(message=msg, verbose=True)
        exit(1)

    if action == 'extract':
        doc.output_path = os.path.join(working_path, 'converted_dataset')

        if not os.path.exists(doc.output_path):
            os.makedirs(doc.output_path)

        doc.output_path_filename = output_path_filename

    # all the activities i.e. Shelf_High_Heavycan, Shelf_Low_Heavycan, etc...
    doc.activity_list = next(os.walk(doc.input_path))[1]

    # used for experimental patients
    if pareticnonparetic:
        # set of motions and labels
        motion_class = MatlabLabels()

        # get list of right or left dexterity user
        specific_patients = motion_class.s_patients_dexterity[leftright_arm]

        upattern = r'(^[A-Z]+[0-9]+)_[nonparetic|paretic]+_[active|nonactive]+_([a-z]+_high|[a-z]+_low|[a-z]+).*\.mat$'

    # loop through activities
    for activity in doc.activity_list:
        matlab_path_list = list()
        matlab_files_list = list()
        # obtain the path based on that activity
        activity_path = os.path.join(doc.input_path, activity)

        # loop through each subject of the current activity
        for (subject_path, _, matlab_file_list) in os.walk(activity_path,
                                                           topdown=False):
            for matlab_file in matlab_file_list:
                # check if matlab files and not hidden files
                if '.mat' in matlab_file and not matlab_file.startswith('.'):
                    # use for checking files
                    if leftright_arm == "":
                        # full matlab path
                        matlab_path_list.append(
                            os.path.join(subject_path, matlab_file))
                        matlab_files_list.append(matlab_file)
                        doc.count += 1
                    # use when extracting control patients files
                    elif leftright_arm in matlab_file:
                        # full matlab path
                        matlab_path_list.append(
                            os.path.join(subject_path, matlab_file))
                        matlab_files_list.append(matlab_file)
                        doc.count += 1
                    # use when extracting experimental patients files
                    elif pareticnonparetic and pareticnonparetic in matlab_file:
                        add_file = False

                        # if paretic or non-paretic information was provided
                        if pareticnonparetic != "":
                            user_information = re.match(pattern=upattern,
                                                        string=matlab_file)
                            # check whether the user is in the right or left side list
                            if user_information.group(1) in specific_patients:
                                add_file = True

                        # if it was not provided, add all the users
                        else:
                            add_file = True

                        if add_file:
                            # full matlab path
                            matlab_path_list.append(
                                os.path.join(subject_path, matlab_file))
                            matlab_files_list.append(matlab_file)
                            doc.count += 1

        # add the respective matlab files to their specific activities
        doc.matlab_files_path_dict[activity] = matlab_path_list
        doc.matlab_files_names_dict[activity] = matlab_files_list
Esempio n. 13
0
def extract_data_and_save_to_file(labels_array='',
                                  ignored_indices='',
                                  dataset='',
                                  motion_class='',
                                  outfile_object='',
                                  current_file_name=''):

    # variable to store all the segments and vectors values
    data = np.empty((1, 1))

    for vector in motion_class.vectorsUsed:

        v_data = dataset[vector]
        if 'joint' == vector:
            for joints in motion_class.jointUsed:
                sensor_data = v_data[0][0][joints][0][0][2:]
                number_row, number_column = sensor_data.shape
                _, ds_column = data.shape
                # temporary array
                temp_array = sensor_data
                # if ds_column is 1 it is the first iteration and special measures have
                # to be taken into consideration when specifying the size of the array if not
                # check this condition, then the code would break trying to add the data
                if ds_column != 1:
                    # create new array with extra index for new data
                    temp_array = np.zeros(
                        (number_row, number_column + ds_column))
                    # merge data
                    temp_array[:, 0:ds_column] = data
                    temp_array[:, ds_column:] = sensor_data
                # add values to the final variable
                data = np.vstack(temp_array)
        else:
            for segments in motion_class.segmentUsed:
                # obtains the values based on the segments and vectors used
                sensor_data = v_data[0][0][segments][0][0][2:]
                number_row, number_column = sensor_data.shape
                _, ds_column = data.shape
                # temporary array
                temp_array = sensor_data
                # if ds_column is 1 it is the first iteration and special measures have
                # to be taken into consideration when specifying the size of the array if not
                # check this condition, then the code would break trying to add the data
                if ds_column != 1:
                    # create new array with extra index for new data
                    temp_array = np.zeros(
                        (number_row, number_column + ds_column))
                    # merge data
                    temp_array[:, 0:ds_column] = data
                    temp_array[:, ds_column:] = sensor_data
                # add values to the final variable
                data = np.vstack(temp_array)

    # merge data with their respective labels
    tmp_arr = ''
    try:
        printout(message='\tMerging data and labels arrays', verbose=True)
        tmp_arr = np.c_[data, labels_array]

    except ValueError:
        msg = '\tsize of data: {0}'.format(np.shape(data))
        printout(message=msg, verbose=True)
        msg = '\tsize of labels: {0}'.format(np.shape(labels_array))
        printout(message=msg, verbose=True, extraspaces=2)
        exit(1)

    if len(ignored_indices) != 0:
        printout(message='\tRemoving \'Ignored\' labels', verbose=True)
        data_labels = remove_ignores(tmp_arr, ignored_indices)
    else:
        data_labels = tmp_arr

    outfile_object.create_dataset(name=current_file_name, data=data_labels)

    add_attributes(outfile_object[current_file_name], current_file_name)
Esempio n. 14
0
def data_collection(file_properties, debugging, extract, logger):

    # set of motions and labels
    motion_class = MatlabLabels()

    # flags used for headers in the log file
    first_pass_ever = True

    if extract:
        out_file = h5py.File(file_properties.output_path_filename, 'w')

    # loop through each activity
    for activity, matlab_file_list in file_properties.matlab_files_path_dict.iteritems(
    ):

        # used for logging information
        first_pass_activity = True

        # loop through all the matlab files
        for index_matlab_file, matlab_file in enumerate(matlab_file_list):

            # get name of matlab file
            matlab_file_name = file_properties.matlab_files_names_dict[
                activity][index_matlab_file]
            msg = 'On activity: {0}'.format(activity)
            logger.getLogger('regular').info(msg)
            msg = 'Accessing file: {0}'.format(matlab_file_name)
            logger.getLogger('regular').info(msg)
            if matlab_file_name not in matlab_file:
                error_msg = 'Fatal Error. missing file={0} for activity={1}'.format(
                    matlab_file_name, activity)
                logger.getLogger('regular').error(error_msg)
                if not debugging:
                    raise ValueError(error_msg)

            # keep track of error within specific files
            temp_log_file_content = list()

            # variable to know whether matlab was access correctly
            matlab_access = True

            # load matlab file content
            try:
                matlab_content = sio.loadmat(matlab_file)
            except ValueError as v_error_msg:
                error_msg = 'File {0} cannot be accessed\n {1}'.format(
                    matlab_file_name, v_error_msg)
                error_message_func(logger=logger,
                                   error_message=error_msg,
                                   debugging=debugging)
                matlab_access = False

            if matlab_access:
                msg = 'Matlab content has been loaded'
                logger.getLogger('tab.regular').info(msg)

                if not ('tree' in matlab_content):
                    error_msg = 'Fatal error. tree structure does not exists.'
                    error_message_func(error_message=error_msg,
                                       debugging=debugging,
                                       logger=logger)

                else:
                    printout(message='\ttree structure exists', verbose=True)

                # this is temporary because if the specific file has no errors then no information
                # about the file will be written
                temp_log_file_content = list()

                # tree2 contains the sensor data
                tree2 = True
                if not ('tree2' in matlab_content):
                    error_msg = 'Fatal error. tree2 structure does not exists.'
                    error_message_func(error_message=error_msg,
                                       debugging=debugging,
                                       logger=logger)
                    tree2 = False

                else:
                    print '\ttree2 structure exists'

                # markerExtract contains the label data
                marker_extract = True
                # check for marketExtract
                if not ('markerExtract' in matlab_content):
                    error_msg = 'Fatal error. MarkerExtract structure does not exists.\n'
                    error_message_func(error_message=error_msg,
                                       debugging=debugging,
                                       logger=logger)
                    marker_extract = False
                    if not debugging:
                        raise ValueError(error_msg)
                else:
                    printout(message='\tMarkerExtract structure exists',
                             verbose=True)

                # structure where the labels are store
                if marker_extract:

                    if 'paretic' in matlab_file_name:
                        if '_paretic_' in matlab_file_name:
                            right_left_paretic_nonparetic_hand_expectation = 'P_'
                        elif '_nonparetic_' in matlab_file_name:
                            right_left_paretic_nonparetic_hand_expectation = 'N_'
                        else:
                            print 'Failed to distinguished between paretic or nonparetic patient.'
                            exit(1)

                    else:
                        if '_r_' in matlab_file_name:
                            right_left_paretic_nonparetic_hand_expectation = 'R_'
                        elif '_l_' in matlab_file_name:
                            right_left_paretic_nonparetic_hand_expectation = 'L_'
                        else:
                            print 'Failed to distinguished between right or left hand patient.'
                            exit(1)

                    # get label data
                    printout(message='\tAccessing MarkerExtract data',
                             verbose=True)
                    # row_data[0][0] = label
                    # row_data[1][0][0] = time step
                    data_array = matlab_content['markerExtract']

                    # use for checking incoming labels
                    start_flag = True
                    end_flag = False
                    wrong_begin_end_label = False
                    previous_label = ''
                    expected_label = ''

                    # used to overcome index roadblocks
                    first_pass = True

                    # keep track of new labels
                    new_label_list = list()

                    # values corresponding labels
                    last_timestep = -1

                    # variable to store labels
                    label_list = list()

                    # ignored list indices
                    ignore_index_list = list()

                    # total number of steps recorded by tree structure
                    total_number_timesteps = \
                        matlab_content['tree']['subject'][0][0]['frames'][0][0]['frame'][0][0]['index'][0][-1][0][0]

                    msg = '\tTotal time step size for tree structure:{0}'.format(
                        total_number_timesteps)
                    printout(message=msg, verbose=True)

                    printout(message='\ttraversing data array', verbose=True)

                    total_number_labels = len(data_array) - 1
                    # READING FILE
                    # loop through each row in the markerExtract file
                    for current_row_number, data in enumerate(data_array):

                        if total_number_timesteps < current_row_number:
                            error_msg = 'mismatch between number of time steps of tree and markerExtract'
                            error_message_func(line=current_row_number,
                                               error_message=error_msg,
                                               debugging=debugging,
                                               logger=temp_log_file_content)

                        # check for label
                        try:
                            # read label and convert it upper case
                            current_label = str(data[0][0]).upper()

                        # if empty row_value
                        except ValueError:
                            error_message_func(line=current_row_number,
                                               error_message='missing label',
                                               debugging=debugging,
                                               logger=temp_log_file_content,
                                               label='')
                        except IndexError:
                            error_msg = 'Failed to get label. Probably empty cell []'
                            error_message_func(line=current_row_number,
                                               error_message=error_msg,
                                               debugging=debugging,
                                               logger=temp_log_file_content,
                                               label='')
                            # switch flags
                            start_flag = not start_flag
                            end_flag = not end_flag
                            continue

                        # check for time step
                        try:
                            # read the time step
                            current_timestep = data[1][0][0].astype(int)

                        # if empty row_value
                        except ValueError:
                            error_message_func(
                                line=current_row_number,
                                error_message='missing time step',
                                label='',
                                debugging=debugging,
                                logger=temp_log_file_content)

                            # switch flags
                            start_flag = not start_flag
                            end_flag = not end_flag
                            continue

                        msg = '\t\tdata cell information: row={0} label={1} timestep={2}'.format(
                            current_row_number + 1, current_label,
                            current_timestep)
                        printout(message=msg, verbose=True)

                        # remove space in the label
                        if ' ' in current_label:
                            error_message_func(line=current_row_number,
                                               error_message='extra space',
                                               debugging=debugging,
                                               logger=temp_log_file_content,
                                               label=current_label)

                            # removed space in label
                            current_label = str(current_label).replace(" ", "")

                        if not (right_left_paretic_nonparetic_hand_expectation in current_label) and \
                                not ('IGNORE_B' in current_label) and not ('IGNORE_E' in current_label):
                            error_msg = 'Expecting label to start with \'' + \
                                        right_left_paretic_nonparetic_hand_expectation + '\''
                            error_message_func(line=current_row_number,
                                               label=current_label,
                                               error_message=error_msg,
                                               debugging=debugging,
                                               logger=temp_log_file_content)
                            tmp_label = list(current_label)
                            tmp_label[
                                0:
                                2] = right_left_paretic_nonparetic_hand_expectation
                            current_label = ("".join(tmp_label)).upper()

                        if right_left_paretic_nonparetic_hand_expectation in current_label:
                            tmp_label = list(current_label)
                            # removes 'R, L, N or P' in the label in order to find the label in the label class
                            current_label = "".join(tmp_label[1:])

                        # check timestep are increasing
                        if last_timestep > current_timestep and not first_pass:
                            error_msg = 'timestep=' + str(current_timestep + 3) + ' Expected timestep > ' + \
                                        str(last_timestep + 3)
                            error_message_func(line=current_row_number,
                                               label=current_label,
                                               error_message=error_msg,
                                               debugging=debugging,
                                               logger=temp_log_file_content)

                        # check if wrong '_B' or '_E' label was previously encountered
                        # if wrong label was seeing, then forget expected label and start again
                        if wrong_begin_end_label:
                            if '_B' in current_label:
                                # switch flags
                                start_flag = True
                                end_flag = False
                            elif '_E' in current_label:
                                # switch flags
                                start_flag = False
                                end_flag = True
                            else:
                                error_msg = 'Wrong label suffix'
                                error_message_func(line=current_row_number,
                                                   label=current_label,
                                                   error_message=error_msg,
                                                   debugging=debugging,
                                                   logger=logger)

                            wrong_begin_end_label = False

                        # start of activity
                        if start_flag:
                            # make sure the 'Begging' label exists
                            if not ('_B' in current_label):
                                if previous_label:
                                    error_msg = 'Expecting label ending in \'_B\' since the last label was \'' + \
                                                previous_label + '\''
                                else:
                                    error_msg = 'Expecting label ending in \'_B\''
                                error_message_func(line=current_row_number,
                                                   label=current_label,
                                                   error_message=error_msg,
                                                   debugging=debugging,
                                                   logger=logger)
                                wrong_begin_end_label = True

                            else:

                                # reduce the starting position by 3 (model's specifications)
                                current_timestep -= 3

                                # For a '_B' label, the timestep or value of the label has to increase by one from the
                                # previous timestep/value's label. For the error message, we increase it by 2 because
                                # of the requirements of the matlab file (project specific)
                                if last_timestep != current_timestep and not first_pass:
                                    error_msg = 'timestep=' + str(current_timestep + 3) + ' Expected timestep >= ' + \
                                                str(last_timestep + 3)
                                    error_message_func(
                                        line=current_row_number,
                                        label=current_label,
                                        error_message=error_msg,
                                        debugging=debugging,
                                        logger=temp_log_file_content)

                                # error check
                                # create a new label in order to compare it to the next label
                                temporary_variable = list(current_label)
                                temporary_variable[-1] = 'E'
                                # store label for future comparison
                                expected_label = (
                                    "".join(temporary_variable)).upper()

                        # '_E' label
                        elif end_flag:

                            # reduce the starting position by 2 (model's specifications) and in order to account
                            # for python not including the last index
                            current_timestep -= 2

                            # make sure the 'Ending' label exists
                            # compare current label to the expected (obtained from changing the previous/start label
                            if not ('_E' in current_label
                                    ) or current_label != expected_label:
                                if previous_label:
                                    error_msg = 'Expecting label ending in \'_E\' since the last label was \'' + \
                                                previous_label + '\''
                                else:
                                    error_msg = 'Expecting label ending in \'_E\''
                                error_message_func(
                                    line=current_row_number,
                                    label=current_label,
                                    error_message=error_msg,
                                    debugging=debugging,
                                    logger=temp_log_file_content)

                                wrong_begin_end_label = True

                            # need to get the label index based on the '_B' label in the motion_class.label list
                            tmp_var = list(current_label)
                            tmp_var[-1] = 'B'
                            new_label = ("".join(tmp_var)).upper()
                            # check if the motion exists
                            if not (new_label in motion_class.labels):
                                error_msg = 'Unknown label'
                                error_message_func(
                                    line=current_row_number,
                                    label=current_label,
                                    error_message=error_msg,
                                    debugging=debugging,
                                    logger=temp_log_file_content)

                                # keep record of new labels
                                new_label_list.append(current_label)

                                # print 'Finishing program. Unknown label.'
                                # always finish the program
                                # exit(1)

                            if not debugging:
                                if 'IGNORE' in current_label:
                                    current_class_label_index = motion_class.labels.index(
                                        current_label)
                                else:
                                    try:
                                        # instead of threading _T_A0A1_E and _T_B2B1_E different, treat them as _T_E
                                        # i.e. as a compact label
                                        splitted_label = current_label.split(
                                            "_")[1]
                                        # check if * or non-characters in the label
                                        remove_non_characters = re.compile(
                                            '[^a-zA-Z]')
                                        splitted_label = remove_non_characters.sub(
                                            '', splitted_label)
                                        # get index of compact label
                                        current_class_label_index = motion_class.compact_list.index(
                                            splitted_label)

                                    except ValueError:
                                        msg = 'Error: label was not found in the compact label list'.format(
                                            current_label)
                                        error_message_func(
                                            line=current_row_number,
                                            error_message=msg,
                                            debugging=debugging,
                                            label=current_label,
                                            logger=temp_log_file_content)

                                # provide the same labels to multiple time steps for hmm algorithm
                                label_range = current_timestep - last_timestep
                                label_list.extend([current_class_label_index] *
                                                  label_range)

                        else:
                            error_msg = 'Error while changing start and end checks'
                            error_message_func(line=current_row_number,
                                               error_message=error_msg,
                                               debugging=debugging,
                                               label='',
                                               logger=temp_log_file_content)

                        if first_pass:
                            if current_timestep != 0:
                                error_msg = 'Time step does not start at 3'
                                error_message_func(
                                    line=current_row_number,
                                    label=current_label,
                                    error_message=error_msg,
                                    debugging=debugging,
                                    logger=temp_log_file_content)

                        # store label for future comparison
                        previous_label = current_label

                        # switch flags
                        start_flag = not start_flag
                        end_flag = not end_flag
                        first_pass = False

                        # check for multiple motions per label
                        label_used = [
                            current_label
                            for pMotions in motion_class.possible_motions
                            if (pMotions in current_label)
                        ]
                        if len(label_used) > 1:
                            error_msg = 'Error: More than one motion in the same label'
                            error_message_func(line=current_row_number,
                                               label=current_label,
                                               error_message=error_msg,
                                               debugging=debugging,
                                               logger=temp_log_file_content)

                        # check for ignored labels/timesteps
                        if 'IGNORE_E' in current_label:
                            ignore_index_list.append(
                                [last_timestep, current_timestep])

                        last_timestep = current_timestep

                        if total_number_labels == current_row_number:

                            # check last label ended in a '_E' label
                            if '_E' not in current_label:
                                error_msg = 'last label in markerextract is not a \'_E\' label'
                                error_message_func(
                                    line=current_row_number,
                                    error_message=error_msg,
                                    debugging=debugging,
                                    logger=temp_log_file_content)

                            # check size of labels and size dataset
                            if (total_number_timesteps + 1) != last_timestep:
                                error_msg = 'mismatch between number of time steps of tree ({0}) and labels in the ' \
                                            'markerExtract ({1})'.format(total_number_timesteps + 1, last_timestep)
                                error_message_func(
                                    error_message=error_msg,
                                    debugging=debugging,
                                    logger=temp_log_file_content)

                    if not debugging and extract and tree2:

                        label_array = np.array(label_list)

                        msg = '\tObtaining sensor data from file {0}'.format(
                            matlab_file_name)
                        printout(message=msg, verbose=True)
                        # extracting sensors' data
                        extract_data_and_save_to_file(
                            labels_array=label_array,
                            ignored_indices=ignore_index_list,
                            dataset=matlab_content['tree2'],
                            motion_class=motion_class,
                            current_file_name=matlab_file_name,
                            outfile_object=out_file)

                        msg = '\tFinished storing sensor data for file {0}'.format(
                            matlab_file_name)
                        printout(message=msg, verbose=True)

            if len(temp_log_file_content) != 0:

                if first_pass_ever:
                    file_properties.output_file_object.write(
                        'List of errors: \n\n')
                    first_pass_ever = False

                if first_pass_activity:
                    # print activity with error
                    file_properties.output_file_object.write(
                        'activity: {0}\n\n'.format(activity))
                    first_pass_activity = False

                # print the filename
                file_properties.output_file_object.write(matlab_file_name)
                file_properties.output_file_object.write('\n')
                # loop through the errors and print them
                for log_line in temp_log_file_content:
                    file_properties.output_file_object.write(log_line)

                file_properties.output_file_object.write('\n')

                try:
                    if len(new_label_list) != 0:
                        file_properties.output_file_object.write(
                            '\tSet of new labels: \n\n')
                        for new_labels in new_label_list:
                            file_properties.output_file_object.write(
                                '\t' + new_labels + '\n')

                        file_properties.output_file_object.write('\n')

                # new_label_list was not defined
                except NameError:
                    continue

    if not extract:
        file_properties.output_file_object.write(
            'Done checking matlab files\n')
Esempio n. 15
0
def forwarding_filters():
    """
    obtain :
        right or left arm
        [paretic, nonparetic,(experimental group) or nothing (control group)
        active or nonactive (experimental group)
    :return: all the filters: leftright_arm, specific_side
    """

    right_left_error = True
    while right_left_error:
        leftright_arm = raw_input('right(r) or left(l): ').upper()
        if leftright_arm == 'L' or leftright_arm == 'LEFT':
            leftright_arm = '_l_'
        elif leftright_arm == 'R' or leftright_arm == 'RIGHT':
            leftright_arm = '_r_'

        else:
            msg = 'No side specified. Please specified a side.'
            printout(message=msg, verbose=True)
            continue

        right_left_error = False

    paretic_nonparetic_errors = True
    while paretic_nonparetic_errors:
        specific_side = ''
        paretic_nonparetic_enter = raw_input('paretic(p), non-paretic(n) or neither(\'enter\'): ').upper()

        if paretic_nonparetic_enter == 'P' or paretic_nonparetic_enter == 'PARETIC':
            pareticnonparetic = 'paretic'

        elif paretic_nonparetic_enter == 'N' or paretic_nonparetic_enter == 'NONPARETIC':
            pareticnonparetic = 'nonparetic'

        elif paretic_nonparetic_enter == '':
            pareticnonparetic = ''

        else:
            msg = 'Wrong option selected.'
            printout(message=msg, verbose=True)
            continue

        paretic_nonparetic_errors = False

    active_nonactive_errors = True
    while active_nonactive_errors:
        if pareticnonparetic == 'paretic':
            activenonactive = raw_input('active(a) or non-active(n): ').upper()
            if activenonactive == 'A' or activenonactive == 'ACTIVE':
                specific_side = '_paretic_active_'
            elif activenonactive == 'N' or activenonactive == 'NONACTIVE':
                specific_side = '_paretic_nonactive_'
            else:
                msg = 'Wrong option selected.'
                printout(message=msg, verbose=True)
                continue

        elif pareticnonparetic == 'nonparetic':
            activenonactive = raw_input('active or non-active: ').upper()
            if activenonactive == 'A' or activenonactive == 'ACTIVE':
                specific_side = '_nonparetic_active_'
            elif activenonactive == 'N' or activenonactive == 'NONACTIVE':
                specific_side = '_nonparetic_nonactive_'
            else:
                msg = 'Wrong option selected.'
                printout(message=msg, verbose=True)
                continue

        active_nonactive_errors = False

    return leftright_arm, specific_side
Esempio n. 16
0
        print '9: Exit'
        print ''

        try:
            selected_option = int(raw_input('Select an option: '))

            if selected_option == 1:
                ml_algorithm('GHMM')
            elif selected_option == 2:
                ml_algorithm('GMMHMM')
            elif selected_option == 3:
                check_matlab()
            elif selected_option == 4:
                convert_featurize_matlab('extract')
            elif selected_option == 5:
                convert_featurize_matlab('featurize')
            elif selected_option == 6:
                move_matlab()
            elif selected_option == 7:
                ml_algorithm('Logistic Regression')
            elif selected_option == 8:
                ml_algorithm('LSTM')
            elif selected_option == 9:
                exit_program()
            else:
                printout(message='Wrong option selected.', verbose=True)

        except ValueError as error_message:
            logging.getLogger('regular').error(error_message)
            raise ValueError(error_message)