コード例 #1
0
def generate_data(ids,
                  sliding_window_length,
                  sliding_window_step,
                  data_dir=None,
                  half=False,
                  identity_bool=False,
                  usage_modus='train'):
    '''
    creates files for each of the sequences, which are extracted from a file
    following a sliding window approach
    
    returns a numpy array
    
    @param ids: ids for train, val or test
    @param sliding_window_length: length of window for segmentation
    @param sliding_window_step: step between windows for segmentation
    @param data_dir: path to dir where files will be stored
    '''

    if identity_bool:
        if usage_modus == 'train':
            recordings = ['R{:02d}'.format(r) for r in range(1, 21)]
        elif usage_modus == 'val':
            recordings = ['R{:02d}'.format(r) for r in range(21, 26)]
        elif usage_modus == 'test':
            recordings = ['R{:02d}'.format(r) for r in range(26, 31)]
    else:
        recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    counter_seq = 0
    hist_classes_all = np.zeros(NUM_CLASSES)

    for P in persons:
        if P not in ids:
            print("\nNo Person in expected IDS {}".format(P))
        else:
            if P == 'S11':
                if identity_bool:
                    if usage_modus == 'train':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(1, 10)
                        ]
                    elif usage_modus == 'val':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(10, 12)
                        ]
                    elif usage_modus == 'test':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(12, 15)
                        ]
                else:
                    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]
            elif P == 'S12':
                if identity_bool:
                    if usage_modus == 'train':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(1, 25)
                        ]
                    elif usage_modus == 'val':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(25, 28)
                        ]
                    elif usage_modus == 'test':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(28, 31)
                        ]
                else:
                    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]
            else:
                if identity_bool:
                    if usage_modus == 'train':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(1, 21)
                        ]
                    elif usage_modus == 'val':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(21, 26)
                        ]
                    elif usage_modus == 'test':
                        recordings = [
                            'R{:02d}'.format(r) for r in range(26, 31)
                        ]
                else:
                    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]
            for R in recordings:
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[R]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[R] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[R] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[R] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in [
                            'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13',
                            'R15'
                    ]:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"

                    file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format(
                        P, S, P, R, annotator_file, N)
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(
                        P, S, P, R, annotator_file, N)

                    try:
                        #getting data
                        data = csv_reader.reader_data(FOLDER_PATH +
                                                      file_name_norm)
                        print("\nFiles loaded in modus {}\n{}".format(
                            usage_modus, file_name_norm))
                        data = select_columns_opp(data)
                        print("Columns selected")
                    except:
                        print("\n In generating data, No file {}".format(
                            FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        #Getting labels and attributes
                        labels = csv_reader.reader_labels(FOLDER_PATH +
                                                          file_name_label)
                        class_labels = np.where(labels[:, 0] == 7)[0]

                        # Deleting rows containing the "none" class
                        data = np.delete(data, class_labels, 0)
                        labels = np.delete(labels, class_labels, 0)

                        if half:
                            downsampling = range(0, data.shape[0], 2)
                            data = data[downsampling]
                            labels = labels[downsampling]
                            data_t, data_x, data_y = divide_x_y(data)
                            del data_t
                        else:
                            data_t, data_x, data_y = divide_x_y(data)
                            del data_t

                    except:
                        print(
                            "\n In generating data, Error getting the data {}".
                            format(FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        # checking if annotations are consistent
                        data_x = normalize(data_x)
                        if np.sum(data_y == labels[:, 0]) == data_y.shape[0]:

                            # Sliding window approach

                            print("Starting sliding window")
                            X, y, y_all = opp_sliding_window(
                                data_x,
                                labels.astype(int),
                                sliding_window_length,
                                sliding_window_step,
                                label_pos_end=False)
                            print("Windows are extracted")

                            # Statistics
                            hist_classes = np.bincount(y[:, 0],
                                                       minlength=NUM_CLASSES)
                            hist_classes_all += hist_classes
                            print("Number of seq per class {}".format(
                                hist_classes_all))

                            for f in range(X.shape[0]):
                                try:

                                    sys.stdout.write('\r' +
                                                     'Creating sequence file '
                                                     'number {} with id {}'.
                                                     format(f, counter_seq))
                                    sys.stdout.flush()

                                    # print "Creating sequence file number {} with id {}".format(f, counter_seq)
                                    seq = np.reshape(X[f],
                                                     newshape=(1, X.shape[1],
                                                               X.shape[2]))
                                    seq = np.require(seq, dtype=np.float)

                                    obj = {
                                        "data": seq,
                                        "label": y[f],
                                        "labels": y_all[f],
                                        "identity": labels_persons[P]
                                    }
                                    f = open(
                                        os.path.join(
                                            data_dir, 'seq_{0:06}.pkl'.format(
                                                counter_seq)), 'wb')
                                    pickle.dump(
                                        obj,
                                        f,
                                        protocol=pickle.HIGHEST_PROTOCOL)
                                    f.close()

                                    counter_seq += 1

                                except:
                                    raise ('\nError adding the seq')

                            print("\nCorrect data extraction from {}".format(
                                FOLDER_PATH + file_name_norm))

                            del data
                            del data_x
                            del data_y
                            del X
                            del labels
                            del class_labels

                        else:
                            print("\nNot consisting annotation in  {}".format(
                                file_name_norm))
                            continue

                    except:
                        print("\n In generating data, No file {}".format(
                            FOLDER_PATH + file_name_norm))

    return
コード例 #2
0
def compute_max_min(ids):
    '''
    Compute the max and min values for normalizing the data.
    
    
    print max and min.
    These values will be computed only once and the max min values
    will be place as constants
    
    @param ids: ids for train
    '''

    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    max_values_total = np.zeros((132))
    min_values_total = np.ones((132)) * 1000000
    for P in persons:
        if P in ids:
            for r, R in enumerate(recordings):
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[r]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'S01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[r] == 'S03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[r] == 'S01':
                        annotator_file = "A03"
                    if P == 'S11' and R in [
                            'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13',
                            'R15'
                    ]:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"

                    file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format(
                        P, S, P, R, annotator_file, N)

                    try:
                        data = csv_reader.reader_data(FOLDER_PATH +
                                                      file_name_norm)
                        print("Files loaded")

                        data_t, data_x, data_y = divide_x_y(data)
                        del data_t
                        del data_y

                        max_values = np.max(data_x, axis=0)
                        min_values = np.min(data_x, axis=0)

                        max_values_total = np.max(
                            (max_values, max_values_total), axis=0)
                        min_values_total = np.min(
                            (min_values, min_values_total), axis=0)

                    except:
                        print("No file {}".format(FOLDER_PATH +
                                                  file_name_norm))

    print("Max values \n{}".format(max_values_total))
    print("Min values \n{}".format(min_values_total))

    return
コード例 #3
0
def compute_max_min(ids):
    '''
    Compute the max and min values for normalizing the data.

    print max and min.
    These values will be computed only once and the max min values
    will be place as constants

    @param ids: ids for train
    '''

    FOLDER_PATH = "path_to_theLARa_Virtual_dataset/"

    # Recording names, refer to the naming of the files in LARa dataset
    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    max_values_total = np.zeros((126))
    min_values_total = np.ones((126)) * 1000000

    accumulator_mean_measurements = np.empty((0, 126))
    accumulator_std_measurements = np.empty((0, 126))

    for P in persons:
        if P in ids:
            accumulator_measurements = np.empty((0, 126))
            for r, R in enumerate(recordings):
                # All of these if-cases are coming due to the naming of the recordings in the data.
                # Not all the subjects have the same
                # annotated recordings, nor annotators, nor annotations runs, nor scenarios.
                # these will include all of the recordings for the subjects
                if P in ["P01", "P02", "P03", "P04", "P05", "P06"]:
                    S = "S01"
                else:
                    S = SCENARIO[r]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'P07' and SCENARIO[r] == 'S01':
                        annotator_file = "A03"
                    if P == 'P14' and SCENARIO[r] == 'S03':
                        annotator_file = "A19"
                    if P == 'P11' and SCENARIO[r] == 'S01':
                        annotator_file = "A03"
                    if P == 'P11' and r in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']:
                        annotator_file = "A02"
                    file_name_norm = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N)

                    try:
                        data = csv_reader.reader_data(FOLDER_PATH + file_name_norm)
                        print("Files loaded")
                    except:
                        print("No file {}".format(FOLDER_PATH + file_name_norm))

                    try:
                        print("Getting the max and min")
                        data_t, data_x, data_y = divide_x_y(data)
                        del data_t
                        del data_y

                        max_values = np.max(data_x, axis=0)
                        min_values = np.min(data_x, axis=0)

                        max_values_total = np.max((max_values, max_values_total), axis=0)
                        min_values_total = np.min((min_values, min_values_total), axis=0)

                        accumulator_measurements = np.append(accumulator_measurements, data_x, axis=0)
                        print("Accumulated")
                    except:
                        print("No file {}".format(FOLDER_PATH + file_name_norm))

            mean_values = np.mean(accumulator_measurements, axis=0)
            std_values = np.std(accumulator_measurements, axis=0)

            accumulator_mean_measurements = np.append(accumulator_mean_measurements, [mean_values], axis=0)
            accumulator_std_measurements = np.append(accumulator_std_measurements, [std_values], axis=0)

    try:
        mean_values = np.mean(accumulator_mean_measurements, axis=0)
        std_values = np.max(accumulator_std_measurements, axis=0)
        mean_values = np.around(mean_values, decimals=4)
        std_values = np.around(std_values, decimals=5)
        print("Max values \n{}".format(max_values_total))
        print("Min values \n{}".format(min_values_total))
        print("Mean values \n{}".format(mean_values))
        print("Std values \n{}".format(std_values))
    except:
        print("Error computing statistics")
    return
コード例 #4
0
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None):
    '''
    creates files for each of the sequences extracted from a file
    following a sliding window approach

    returns a numpy array

    @param ids: ids for train, val or test
    @param sliding_window_length: length of window for segmentation
    @param sliding_window_step: step between windows for segmentation
    @param data_dir: path to dir where files will be stored
    '''


    FOLDER_PATH = '/path_to_LARa_Mocap_for_annotations/'
    folder_derivative = "/path_to_LARa_Mocap_for_annotations/"

    # Recording names, refer to the naming of the files in LARa dataset
    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    counter_seq = 0
    hist_classes_all = np.zeros(NUM_CLASSES)

    for P in persons:
        if P not in ids:
            print("\nNo Person in expected IDS {}".format(P))
        else:
            for r, R in enumerate(recordings):
                # Selecting the proportions of the train, val or testing according to the quentity of
                # recordings per subject, as there are not equal number of recordings per subject
                # see dataset for checking the recording files per subject
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[R]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == 'S07' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S14' and SCENARIO[r] == 'L03':
                        annotator_file = "A19"
                    if P == 'S11' and SCENARIO[r] == 'L01':
                        annotator_file = "A03"
                    if P == 'S11' and R in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']:
                        annotator_file = "A02"
                    if P == 'S13' and R in ['R28']:
                        annotator_file = "A01"
                    if P == 'S13' and R in ['R29', 'R30']:
                        annotator_file = "A11"
                    if P == 'S09' and R in ['R28', 'R29']:
                        annotator_file = "A01"
                    if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']:
                        annotator_file = "A11"

                    file_name_norm = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N)
                    file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(P, S, P, R, annotator_file, N)

                    try:
                        # getting data
                        data = csv_reader.reader_data(folder_derivative + file_name_norm)
                        print("\nFiles loaded")
                    except:
                        print("\n In generating data, No file {}".format(folder_derivative + file_name_norm))
                        continue

                    try:
                        # Getting labels and attributes
                        labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label)
                        class_labels = np.where(labels[:, 0] == 7)[0]
                        print("\nGet labels")

                        # Deleting rows containing the "none" class
                        data = np.delete(data, class_labels, 0)
                        labels = np.delete(labels, class_labels, 0)
                        print("\nDeleting none rows")

                        # halving the frequency, as Mbientlab or MotionMiners sensors use 100Hz
                        downsampling = range(0, data.shape[0], 2)
                        data = data[downsampling]
                        labels = labels[downsampling]
                        data_t, data_x, data_y = divide_x_y(data)
                        del data_t
                        print("\nDownsampling")

                    except:
                        print("\n In generating data, Error getting the data {}".format(FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        # checking if annotations are consistent
                        data_x = norm_mean_std(data_x)
                        if np.sum(data_y == labels[:, 0]) == data_y.shape[0]:

                            # Sliding window approach
                            print("Starting sliding window")
                            X, y, y_all = opp_sliding_window(data_x, labels.astype(int), sliding_window_length,
                                                             sliding_window_step, label_pos_end=False)
                            print("Windows are extracted")

                            # Statistics
                            hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES)
                            hist_classes_all += hist_classes
                            print("Number of seq per class {}".format(hist_classes_all))

                            for f in range(X.shape[0]):
                                try:

                                    sys.stdout.write(
                                        '\r' + 'Creating sequence file number {} with id {}'.format(f, counter_seq))
                                    sys.stdout.flush()

                                    # print "Creating sequence file number {} with id {}".format(f, counter_seq)
                                    seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2]))
                                    seq = np.require(seq, dtype=np.float)

                                    # Storing the sequences
                                    obj = {"data": seq, "label": y[f], "labels": y_all[f]}
                                    f = open(os.path.join(data_dir, 'seq_{0:06}.pkl'.format(counter_seq)), 'wb')
                                    pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
                                    f.close()

                                    counter_seq += 1

                                except:
                                    raise ('\nError adding the seq')

                            print("\nCorrect data extraction from {}".format(FOLDER_PATH + file_name_norm))

                            del data
                            del data_x
                            del data_y
                            del X
                            del labels
                            del class_labels

                        else:
                            print("\nNot consisting annotation in  {}".format(file_name_norm))
                            continue

                    except:
                        print("\n In generating data, No file {}".format(FOLDER_PATH + file_name_norm))

    return
コード例 #5
0
def generate_derivatives(ids):
    '''
    Generate the files containing the derivatives of the sequences, what will be called virtual IMUs

    THe functions will store files with the derivatives of the Mocap data for the subjects, specified with IDs,
    and stored the files under the same name of the MoCAP recordings, keeping up the same structure of the LARA
    dataset

    @param ids: IDS of the subjects for which derivatives will be computed
    '''

    FOLDER_PATH = '/path_to_theLARa_MOCAP_dataset/'
    folder_derivative = "path_to_theLARa_Virtual_dataset/"

    # Recording names, refer to the naming of the files in LARa dataset
    recordings = ['R{:02d}'.format(r) for r in range(1, 31)]

    for P in persons:
        if P not in ids:
            print("\nNo Person in expected IDS {}".format(P))
        else:
            for r, R in enumerate(recordings):
                # All of these if-cases are coming due to the naming of the recordings in the data.
                # Not all the subjects have the same
                # annotated recordings, nor annotators, nor annotations runs, nor scenarios.
                # these will include all of the recordings for the subjects
                if P in ["S01", "S02", "S03", "S04", "S05", "S06"]:
                    S = "L01"
                else:
                    S = SCENARIO[R]
                for N in repetition:
                    annotator_file = annotator[P]
                    if P == "S07" and SCENARIO[R] == "L01":
                        annotator_file = "A03"
                    if P == "S14" and SCENARIO[R] == "L03":
                        annotator_file = "A19"
                    if P == "S11" and SCENARIO[R] == "L01":
                        annotator_file = "A03"
                    if P == "S11" and R in ["R04", "R08", "R09", "R10", "R11", "R12", "R13", "R15"]:
                        annotator_file = "A02"
                    if P == "S13" and R in ["R28"]:
                        annotator_file = "A01"
                    if P == "S13" and R in ["R29", "R30"]:
                        annotator_file = "A11"
                    if P == "S09" and R in ["R28", "R29"]:
                        annotator_file = "A01"
                    if P == "S09" and R in ["R21", "R22", "R23", "R24", "R25"]:
                        annotator_file = "A11"

                    file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format(P, S, P, R, annotator_file, N)
                    file_name_derivative = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N)

                    try:
                        # getting data
                        data = csv_reader.reader_data(FOLDER_PATH + file_name_norm)
                        print("\nFiles loaded")
                        data = select_columns_opp(data)
                        print("Columns selected")
                    except:
                        print("\n In generating data, selecting Columns\nNo file {}".format(FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        # Interpolating
                        print("Interpolating")
                        data = interpolate(data)
                    except:
                        print("\n In generating data, Interpolatin the data {}".format(FOLDER_PATH + file_name_norm))
                        continue

                    try:
                        print("\nsaving")
                        save_data_csv(data, folder_derivative + file_name_derivative)
                    except:
                        print(
                            "\n In generating data, Error Saving \n"
                            "Error getting the data {}".format(folder_derivative + file_name_derivative))
                        continue
    return