def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None, half=False, identity_bool=False, usage_modus='train'): ''' creates files for each of the sequences, which are extracted from a file following a sliding window approach returns a numpy array @param ids: ids for train, val or test @param sliding_window_length: length of window for segmentation @param sliding_window_step: step between windows for segmentation @param data_dir: path to dir where files will be stored ''' if identity_bool: if usage_modus == 'train': recordings = ['R{:02d}'.format(r) for r in range(1, 21)] elif usage_modus == 'val': recordings = ['R{:02d}'.format(r) for r in range(21, 26)] elif usage_modus == 'test': recordings = ['R{:02d}'.format(r) for r in range(26, 31)] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] counter_seq = 0 hist_classes_all = np.zeros(NUM_CLASSES) for P in persons: if P not in ids: print("\nNo Person in expected IDS {}".format(P)) else: if P == 'S11': if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(r) for r in range(1, 10) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(r) for r in range(10, 12) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(r) for r in range(12, 15) ] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] elif P == 'S12': if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(r) for r in range(1, 25) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(r) for r in range(25, 28) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(r) for r in range(28, 31) ] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] else: if identity_bool: if usage_modus == 'train': recordings = [ 'R{:02d}'.format(r) for r in range(1, 21) ] elif usage_modus == 'val': recordings = [ 'R{:02d}'.format(r) for r in range(21, 26) ] elif usage_modus == 'test': recordings = [ 'R{:02d}'.format(r) for r in range(26, 31) ] else: recordings = ['R{:02d}'.format(r) for r in range(1, 31)] for R in recordings: if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[R] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[R] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[R] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[R] == 'L01': annotator_file = "A03" if P == 'S11' and R in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format( P, S, P, R, annotator_file, N) file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format( P, S, P, R, annotator_file, N) try: #getting data data = csv_reader.reader_data(FOLDER_PATH + file_name_norm) print("\nFiles loaded in modus {}\n{}".format( usage_modus, file_name_norm)) data = select_columns_opp(data) print("Columns selected") except: print("\n In generating data, No file {}".format( FOLDER_PATH + file_name_norm)) continue try: #Getting labels and attributes labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label) class_labels = np.where(labels[:, 0] == 7)[0] # Deleting rows containing the "none" class data = np.delete(data, class_labels, 0) labels = np.delete(labels, class_labels, 0) if half: downsampling = range(0, data.shape[0], 2) data = data[downsampling] labels = labels[downsampling] data_t, data_x, data_y = divide_x_y(data) del data_t else: data_t, data_x, data_y = divide_x_y(data) del data_t except: print( "\n In generating data, Error getting the data {}". format(FOLDER_PATH + file_name_norm)) continue try: # checking if annotations are consistent data_x = normalize(data_x) if np.sum(data_y == labels[:, 0]) == data_y.shape[0]: # Sliding window approach print("Starting sliding window") X, y, y_all = opp_sliding_window( data_x, labels.astype(int), sliding_window_length, sliding_window_step, label_pos_end=False) print("Windows are extracted") # Statistics hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES) hist_classes_all += hist_classes print("Number of seq per class {}".format( hist_classes_all)) for f in range(X.shape[0]): try: sys.stdout.write('\r' + 'Creating sequence file ' 'number {} with id {}'. format(f, counter_seq)) sys.stdout.flush() # print "Creating sequence file number {} with id {}".format(f, counter_seq) seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2])) seq = np.require(seq, dtype=np.float) obj = { "data": seq, "label": y[f], "labels": y_all[f], "identity": labels_persons[P] } f = open( os.path.join( data_dir, 'seq_{0:06}.pkl'.format( counter_seq)), 'wb') pickle.dump( obj, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() counter_seq += 1 except: raise ('\nError adding the seq') print("\nCorrect data extraction from {}".format( FOLDER_PATH + file_name_norm)) del data del data_x del data_y del X del labels del class_labels else: print("\nNot consisting annotation in {}".format( file_name_norm)) continue except: print("\n In generating data, No file {}".format( FOLDER_PATH + file_name_norm)) return
def compute_max_min(ids): ''' Compute the max and min values for normalizing the data. print max and min. These values will be computed only once and the max min values will be place as constants @param ids: ids for train ''' recordings = ['R{:02d}'.format(r) for r in range(1, 31)] max_values_total = np.zeros((132)) min_values_total = np.ones((132)) * 1000000 for P in persons: if P in ids: for r, R in enumerate(recordings): if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[r] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'S01': annotator_file = "A03" if P == 'S14' and SCENARIO[r] == 'S03': annotator_file = "A19" if P == 'S11' and SCENARIO[r] == 'S01': annotator_file = "A03" if P == 'S11' and R in [ 'R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15' ]: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format( P, S, P, R, annotator_file, N) try: data = csv_reader.reader_data(FOLDER_PATH + file_name_norm) print("Files loaded") data_t, data_x, data_y = divide_x_y(data) del data_t del data_y max_values = np.max(data_x, axis=0) min_values = np.min(data_x, axis=0) max_values_total = np.max( (max_values, max_values_total), axis=0) min_values_total = np.min( (min_values, min_values_total), axis=0) except: print("No file {}".format(FOLDER_PATH + file_name_norm)) print("Max values \n{}".format(max_values_total)) print("Min values \n{}".format(min_values_total)) return
def compute_max_min(ids): ''' Compute the max and min values for normalizing the data. print max and min. These values will be computed only once and the max min values will be place as constants @param ids: ids for train ''' FOLDER_PATH = "path_to_theLARa_Virtual_dataset/" # Recording names, refer to the naming of the files in LARa dataset recordings = ['R{:02d}'.format(r) for r in range(1, 31)] max_values_total = np.zeros((126)) min_values_total = np.ones((126)) * 1000000 accumulator_mean_measurements = np.empty((0, 126)) accumulator_std_measurements = np.empty((0, 126)) for P in persons: if P in ids: accumulator_measurements = np.empty((0, 126)) for r, R in enumerate(recordings): # All of these if-cases are coming due to the naming of the recordings in the data. # Not all the subjects have the same # annotated recordings, nor annotators, nor annotations runs, nor scenarios. # these will include all of the recordings for the subjects if P in ["P01", "P02", "P03", "P04", "P05", "P06"]: S = "S01" else: S = SCENARIO[r] for N in repetition: annotator_file = annotator[P] if P == 'P07' and SCENARIO[r] == 'S01': annotator_file = "A03" if P == 'P14' and SCENARIO[r] == 'S03': annotator_file = "A19" if P == 'P11' and SCENARIO[r] == 'S01': annotator_file = "A03" if P == 'P11' and r in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']: annotator_file = "A02" file_name_norm = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N) try: data = csv_reader.reader_data(FOLDER_PATH + file_name_norm) print("Files loaded") except: print("No file {}".format(FOLDER_PATH + file_name_norm)) try: print("Getting the max and min") data_t, data_x, data_y = divide_x_y(data) del data_t del data_y max_values = np.max(data_x, axis=0) min_values = np.min(data_x, axis=0) max_values_total = np.max((max_values, max_values_total), axis=0) min_values_total = np.min((min_values, min_values_total), axis=0) accumulator_measurements = np.append(accumulator_measurements, data_x, axis=0) print("Accumulated") except: print("No file {}".format(FOLDER_PATH + file_name_norm)) mean_values = np.mean(accumulator_measurements, axis=0) std_values = np.std(accumulator_measurements, axis=0) accumulator_mean_measurements = np.append(accumulator_mean_measurements, [mean_values], axis=0) accumulator_std_measurements = np.append(accumulator_std_measurements, [std_values], axis=0) try: mean_values = np.mean(accumulator_mean_measurements, axis=0) std_values = np.max(accumulator_std_measurements, axis=0) mean_values = np.around(mean_values, decimals=4) std_values = np.around(std_values, decimals=5) print("Max values \n{}".format(max_values_total)) print("Min values \n{}".format(min_values_total)) print("Mean values \n{}".format(mean_values)) print("Std values \n{}".format(std_values)) except: print("Error computing statistics") return
def generate_data(ids, sliding_window_length, sliding_window_step, data_dir=None): ''' creates files for each of the sequences extracted from a file following a sliding window approach returns a numpy array @param ids: ids for train, val or test @param sliding_window_length: length of window for segmentation @param sliding_window_step: step between windows for segmentation @param data_dir: path to dir where files will be stored ''' FOLDER_PATH = '/path_to_LARa_Mocap_for_annotations/' folder_derivative = "/path_to_LARa_Mocap_for_annotations/" # Recording names, refer to the naming of the files in LARa dataset recordings = ['R{:02d}'.format(r) for r in range(1, 31)] counter_seq = 0 hist_classes_all = np.zeros(NUM_CLASSES) for P in persons: if P not in ids: print("\nNo Person in expected IDS {}".format(P)) else: for r, R in enumerate(recordings): # Selecting the proportions of the train, val or testing according to the quentity of # recordings per subject, as there are not equal number of recordings per subject # see dataset for checking the recording files per subject if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[R] for N in repetition: annotator_file = annotator[P] if P == 'S07' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S14' and SCENARIO[r] == 'L03': annotator_file = "A19" if P == 'S11' and SCENARIO[r] == 'L01': annotator_file = "A03" if P == 'S11' and R in ['R04', 'R08', 'R09', 'R10', 'R11', 'R12', 'R13', 'R15']: annotator_file = "A02" if P == 'S13' and R in ['R28']: annotator_file = "A01" if P == 'S13' and R in ['R29', 'R30']: annotator_file = "A11" if P == 'S09' and R in ['R28', 'R29']: annotator_file = "A01" if P == 'S09' and R in ['R21', 'R22', 'R23', 'R24', 'R25']: annotator_file = "A11" file_name_norm = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N) file_name_label = "{}/{}_{}_{}_{}_{}_labels.csv".format(P, S, P, R, annotator_file, N) try: # getting data data = csv_reader.reader_data(folder_derivative + file_name_norm) print("\nFiles loaded") except: print("\n In generating data, No file {}".format(folder_derivative + file_name_norm)) continue try: # Getting labels and attributes labels = csv_reader.reader_labels(FOLDER_PATH + file_name_label) class_labels = np.where(labels[:, 0] == 7)[0] print("\nGet labels") # Deleting rows containing the "none" class data = np.delete(data, class_labels, 0) labels = np.delete(labels, class_labels, 0) print("\nDeleting none rows") # halving the frequency, as Mbientlab or MotionMiners sensors use 100Hz downsampling = range(0, data.shape[0], 2) data = data[downsampling] labels = labels[downsampling] data_t, data_x, data_y = divide_x_y(data) del data_t print("\nDownsampling") except: print("\n In generating data, Error getting the data {}".format(FOLDER_PATH + file_name_norm)) continue try: # checking if annotations are consistent data_x = norm_mean_std(data_x) if np.sum(data_y == labels[:, 0]) == data_y.shape[0]: # Sliding window approach print("Starting sliding window") X, y, y_all = opp_sliding_window(data_x, labels.astype(int), sliding_window_length, sliding_window_step, label_pos_end=False) print("Windows are extracted") # Statistics hist_classes = np.bincount(y[:, 0], minlength=NUM_CLASSES) hist_classes_all += hist_classes print("Number of seq per class {}".format(hist_classes_all)) for f in range(X.shape[0]): try: sys.stdout.write( '\r' + 'Creating sequence file number {} with id {}'.format(f, counter_seq)) sys.stdout.flush() # print "Creating sequence file number {} with id {}".format(f, counter_seq) seq = np.reshape(X[f], newshape=(1, X.shape[1], X.shape[2])) seq = np.require(seq, dtype=np.float) # Storing the sequences obj = {"data": seq, "label": y[f], "labels": y_all[f]} f = open(os.path.join(data_dir, 'seq_{0:06}.pkl'.format(counter_seq)), 'wb') pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() counter_seq += 1 except: raise ('\nError adding the seq') print("\nCorrect data extraction from {}".format(FOLDER_PATH + file_name_norm)) del data del data_x del data_y del X del labels del class_labels else: print("\nNot consisting annotation in {}".format(file_name_norm)) continue except: print("\n In generating data, No file {}".format(FOLDER_PATH + file_name_norm)) return
def generate_derivatives(ids): ''' Generate the files containing the derivatives of the sequences, what will be called virtual IMUs THe functions will store files with the derivatives of the Mocap data for the subjects, specified with IDs, and stored the files under the same name of the MoCAP recordings, keeping up the same structure of the LARA dataset @param ids: IDS of the subjects for which derivatives will be computed ''' FOLDER_PATH = '/path_to_theLARa_MOCAP_dataset/' folder_derivative = "path_to_theLARa_Virtual_dataset/" # Recording names, refer to the naming of the files in LARa dataset recordings = ['R{:02d}'.format(r) for r in range(1, 31)] for P in persons: if P not in ids: print("\nNo Person in expected IDS {}".format(P)) else: for r, R in enumerate(recordings): # All of these if-cases are coming due to the naming of the recordings in the data. # Not all the subjects have the same # annotated recordings, nor annotators, nor annotations runs, nor scenarios. # these will include all of the recordings for the subjects if P in ["S01", "S02", "S03", "S04", "S05", "S06"]: S = "L01" else: S = SCENARIO[R] for N in repetition: annotator_file = annotator[P] if P == "S07" and SCENARIO[R] == "L01": annotator_file = "A03" if P == "S14" and SCENARIO[R] == "L03": annotator_file = "A19" if P == "S11" and SCENARIO[R] == "L01": annotator_file = "A03" if P == "S11" and R in ["R04", "R08", "R09", "R10", "R11", "R12", "R13", "R15"]: annotator_file = "A02" if P == "S13" and R in ["R28"]: annotator_file = "A01" if P == "S13" and R in ["R29", "R30"]: annotator_file = "A11" if P == "S09" and R in ["R28", "R29"]: annotator_file = "A01" if P == "S09" and R in ["R21", "R22", "R23", "R24", "R25"]: annotator_file = "A11" file_name_norm = "{}/{}_{}_{}_{}_{}_norm_data.csv".format(P, S, P, R, annotator_file, N) file_name_derivative = "{}/{}_{}_{}_{}_{}_der_data.csv".format(P, S, P, R, annotator_file, N) try: # getting data data = csv_reader.reader_data(FOLDER_PATH + file_name_norm) print("\nFiles loaded") data = select_columns_opp(data) print("Columns selected") except: print("\n In generating data, selecting Columns\nNo file {}".format(FOLDER_PATH + file_name_norm)) continue try: # Interpolating print("Interpolating") data = interpolate(data) except: print("\n In generating data, Interpolatin the data {}".format(FOLDER_PATH + file_name_norm)) continue try: print("\nsaving") save_data_csv(data, folder_derivative + file_name_derivative) except: print( "\n In generating data, Error Saving \n" "Error getting the data {}".format(folder_derivative + file_name_derivative)) continue return