def get_engagement_increase_vs_decrease_timeframes(path, ident, seconds): """Returns raw data from either engagement 'increase' or 'decrease' time frames and their class (0 or 1). TODO: join functions""" dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files samp_rate = int(round(len(data[1]) / max(data[0]))) cog_res = dataread.read_cognitive_load_study( str(ident) + '-primary-extract.txt') tasks_data = np.empty((0, seconds * samp_rate)) tasks_y = np.empty((0, 1)) busy_n = dataread.get_data_task_timestamps(return_indexes=True) relax_n = dataread.get_relax_timestamps(return_indexes=True) for i in cog_res['task_number']: task_num_table = i - 225 # 0 - 17 ### engagement increase / decrease if task_num_table == 0: continue mid = int( (relax_n[task_num_table][0] + relax_n[task_num_table][1]) / 2) length = int(samp_rate * 30) for j in range(10): new_end = int(mid - j * samp_rate) new_start2 = int(mid + j * samp_rate) dataextract_decrease = dataextractor.DataExtractor( data[0][new_end - length:new_end], data[1][new_end - length:new_end], samp_rate) dataextract_increase = dataextractor.DataExtractor( data[0][new_start2:new_start2 + length], data[1][new_start2:new_start2 + length], samp_rate) try: tasks_data = np.vstack((tasks_data, dataextract_increase.y)) tasks_y = np.vstack((tasks_y, 1)) tasks_data = np.vstack((tasks_data, dataextract_decrease.y)) tasks_y = np.vstack((tasks_y, 0)) except ValueError: print(ident) # ignore short windows return tasks_data, tasks_y
def get_busy_vs_relax_timeframes(path, ident, seconds): """Returns raw data from either 'on task' or 'relax' time frames and their class (0 or 1). TODO: join functions""" dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files samp_rate = int(round(len(data[1]) / max(data[0]))) cog_res = dataread.read_cognitive_load_study( str(ident) + '-primary-extract.txt') tasks_data = np.empty((0, seconds * samp_rate)) tasks_y = np.empty((0, 1)) busy_n = dataread.get_data_task_timestamps(return_indexes=True) relax_n = dataread.get_relax_timestamps(return_indexes=True) for i in cog_res['task_number']: task_num_table = i - 225 # 0 - 17 ### task versus relax (1 sample each) dataextract = dataextractor.DataExtractor( data[0][busy_n[task_num_table][0]:busy_n[task_num_table][1]], data[1][busy_n[task_num_table][0]:busy_n[task_num_table][1]], samp_rate) dataextract_relax = dataextractor.DataExtractor( data[0][relax_n[task_num_table][0]:relax_n[task_num_table][1]], data[1][relax_n[task_num_table][0]:relax_n[task_num_table][1]], samp_rate) try: tasks_data = np.vstack( (tasks_data, dataextract.y[-samp_rate * seconds:])) tasks_y = np.vstack((tasks_y, 1)) tasks_data = np.vstack( (tasks_data, dataextract_relax.y[-samp_rate * seconds:])) tasks_y = np.vstack((tasks_y, 0)) except ValueError: print(ident) # ignore short windows return tasks_data, tasks_y
def get_task_complexities_timeframes(path, ident, seconds): """Returns raw data along with task complexity class. TODO: join functions. Add parameter to choose different task types and complexities""" dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files samp_rate = int(round(len(data[1]) / max(data[0]))) cog_res = dataread.read_cognitive_load_study( str(ident) + '-primary-extract.txt') tasks_data = np.empty((0, seconds * samp_rate)) tasks_y = np.empty((0, 1)) busy_n = dataread.get_data_task_timestamps(return_indexes=True) relax_n = dataread.get_relax_timestamps(return_indexes=True) for i in cog_res['task_number']: task_num_table = i - 225 # 0 - 17 ### task complexity classification if cog_res['task_complexity'][task_num_table] == 'medium': continue # if cog_res['task_label'][task_num_table] == 'FA' or cog_res['task_label'][task_num_table] == 'HP': # continue if cog_res['task_label'][task_num_table] != 'NC': continue map_compl = {'low': 0, 'medium': 2, 'high': 1} for j in range(10): new_end = int(busy_n[task_num_table][1] - j * samp_rate) new_start = int(new_end - samp_rate * 30) dataextract = dataextractor.DataExtractor( data[0][new_start:new_end], data[1][new_start:new_end], samp_rate) try: tasks_data = np.vstack((tasks_data, dataextract.y)) tasks_y = np.vstack( (tasks_y, map_compl.get( cog_res['task_complexity'][task_num_table]))) except ValueError: print(ident) return tasks_data, tasks_y
def compare_extracted_hr_and_band(path, ident): """Compater heart rates acquired wirelessly and with Microfost Band. :param path: (str) main path to data, where user data is located in specific folders :param ident: (str) user identifier :return: MAE, MSE, CORRelation values of the aligned HR time series """ dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files data = dataread.unwrap_grc_data() # unwrap phase. returns time and y values samp_rate = round(len(data[1]) / max(data[0])) dataextract = dataextractor.DataExtractor(data[0], data[1], samp_rate) cog_res = dataread.read_cognitive_load_study(ident + '-primary-extract.txt') end_epoch_time = dataread.get_end_time_cognitive_load_study() # end t extracted_br_features = dataextract.raw_windowing_breathing(30, 1) extracted_br_features['br_rate'] = np.array(extracted_br_features['br_rate'].rolling(6).mean()) extracted_br_features_roll_avg = extracted_br_features.loc[:, extracted_br_features.columns != 'times'].rolling( 6).mean() extracted_br_features_roll_avg['times'] = extracted_br_features['times'] extracted_br_features_roll_avg['br_ok'] = extracted_br_features['br_ok'] extracted_hr_features = dataextract.raw_windowing_heartrate(10, 1) extracted_hr_features = extracted_hr_features.drop(['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf'], axis=1) extracted_hr_features_roll_avg = extracted_hr_features.loc[:, extracted_hr_features.columns != 'times'].rolling( 10).mean() extracted_hr_features_roll_avg['times'] = extracted_hr_features['times'] extracted_hr_features_roll_avg['hr_ok1'] = extracted_hr_features['hr_ok'] bandread = bandreader.HeartRateBand(path + '_Hrates/', ident) band_data = bandread.load() band_data_time_start = bisect(band_data[0][:], end_epoch_time - data[0][-1] * 1000) band_data_time_stop = bisect(band_data[0][:], end_epoch_time) band_data = [band_data[0][band_data_time_start:band_data_time_stop], band_data[1][band_data_time_start:band_data_time_stop]] band_data_new_data = [(band_data[0] - band_data[0][0]) / 1000, band_data[1]] plt.figure(1) plt.clf() plt.plot(extracted_hr_features_roll_avg['times'], extracted_hr_features_roll_avg['hr_rate'], color='orange', label='Wi-Mind heart rate') plt.plot(band_data_new_data[0], band_data_new_data[1], color='green', label='Microsoft Band heart rate') plt.xlabel('time (s)') plt.ylabel('heart rate') plt.legend() plt.show() hr_data = extracted_hr_features_roll_avg[['times', 'hr_rate']] hr_data['times'] = hr_data['times'].astype(int) band_data = pd.DataFrame() band_data['times'] = band_data_new_data[0] band_data['times'] = band_data['times'].astype(int) band_data['rate'] = band_data_new_data[1] band_data = band_data.drop_duplicates(subset=['times']) together_data = pd.merge(hr_data, band_data, on='times') together_data = together_data.dropna() # new_hr = res_ind[intersect] # new_band = band_data_new__data[1][intersect] mae = metrics.mean_absolute_error(together_data['rate'], together_data['hr_rate']) mse = metrics.mean_squared_error(together_data['rate'], together_data['hr_rate']) corr = stats.pearsonr(together_data['rate'], together_data['hr_rate']) # print('mae amd mse: ', mae, mse) return mae, mse, corr
def full_signal_extract(path, ident): """Extract breathing and heartbeat features from one user and save features to file. :param path: (str) main path to data, where user data is located in specific folders :param ident: (str) user identifier :return: Nothing. It saves features (dataframe) to a .csv file """ dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files data = dataread.unwrap_grc_data() # unwrap phase. returns time and y values samp_rate = round(len(data[1]) / max(data[0])) dataextract = dataextractor.DataExtractor(data[0], data[1], samp_rate) cog_res = dataread.read_cognitive_load_study(ident + '-primary-extract.txt') end_epoch_time = dataread.get_end_time_cognitive_load_study() # end t extracted_br_features = dataextract.raw_windowing_breathing(30, 1) extracted_br_features['br_rate'] = np.array(extracted_br_features['br_rate'].rolling(6).mean()) extracted_br_features_roll_avg = extracted_br_features.loc[:, extracted_br_features.columns != 'times'].rolling( 6).mean() extracted_br_features_roll_avg['times'] = extracted_br_features['times'] extracted_br_features_roll_avg['br_ok'] = extracted_br_features['br_ok'] extracted_hr_features = dataextract.raw_windowing_heartrate(10, 1) extracted_hr_features = extracted_hr_features.drop(['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf'], axis=1) extracted_hr_features_roll_avg = extracted_hr_features.loc[:, extracted_hr_features.columns != 'times'].rolling( 10).mean() extracted_hr_features_roll_avg['times'] = extracted_hr_features['times'] extracted_hr_features_roll_avg['hr_ok'] = extracted_hr_features['hr_ok'] extracted_hr_features2 = dataextract.raw_windowing_heartrate(100, 1) # longer time to extract HRV frequency feat. extracted_hr_features2 = extracted_hr_features2[['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf', 'times']] extracted_hr_features2_roll_avg = extracted_hr_features2.loc[:, extracted_hr_features2.columns != 'times'].rolling( 10).mean() extracted_hr_features2_roll_avg['times'] = extracted_hr_features2['times'] all_features = extracted_br_features_roll_avg all_features = pd.merge(all_features, extracted_hr_features_roll_avg, on='times') all_features = pd.merge(all_features, extracted_hr_features2_roll_avg, on='times') task_timestamps = dataread.get_data_task_timestamps() relax_timestamps = dataread.get_relax_timestamps() bandread = bandreader.HeartRateBand(path + '_Hrates/', ident) band_data = bandread.load() band_data_time_start = bisect(band_data[0][:], end_epoch_time - data[0][-1] * 1000) band_data_time_stop = bisect(band_data[0][:], end_epoch_time) band_data = [band_data[0][band_data_time_start:band_data_time_stop], band_data[1][band_data_time_start:band_data_time_stop]] band_data_new__data = [(band_data[0] - band_data[0][0]) / 1000, band_data[1]] hr_data = extracted_hr_features_roll_avg[['times', 'hr_rate']] hr_data['times'] = hr_data['times'].astype(int) band_data = pd.DataFrame() band_data['times'] = band_data_new__data[0] band_data['times'] = band_data['times'].astype(int) band_data['band_rate'] = band_data_new__data[1] band_data = band_data.drop_duplicates(subset=['times']) together_data = pd.merge(hr_data, band_data, on='times') together_data = together_data.dropna() for i in range(len(all_features['times'])): find_in_hr_data = bisect(together_data['times'], all_features['times'][i]) all_features.ix[i, 'band_rate'] = together_data['band_rate'][find_in_hr_data] for i in range(len(cog_res)): all_feat_ind_task_start = bisect(all_features['times'], task_timestamps[i][0]) all_feat_ind_task_end = bisect(all_features['times'], task_timestamps[i][1]) for j in cog_res.columns: all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, j] = cog_res.iloc[i][j] if cog_res.iloc[i][j] == 'GC' or cog_res.iloc[i][j] == 'PT': all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'keyboard_task'] = True elif cog_res.iloc[i][j] == 'HP' or cog_res.iloc[i][j] == 'FA' or cog_res.iloc[i][j] == 'NC' or \ cog_res.iloc[i][j] == 'SX': all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'keyboard_task'] = False for k in range(all_feat_ind_task_end - all_feat_ind_task_start + 1): all_features.ix[k + all_feat_ind_task_start, 'on_task_or_break_index'] = k for k in range(all_feat_ind_task_end - all_feat_ind_task_start, -1, -1): all_features.ix[all_feat_ind_task_end - k, 'on_task_or_break_index_down'] = k all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'on_task'] = True for i in range(len(relax_timestamps)): all_feat_ind_task_start = bisect(all_features['times'], relax_timestamps[i][0]) all_feat_ind_task_end = bisect(all_features['times'], relax_timestamps[i][1]) new_end = all_feat_ind_task_end + 30 # if i==0: # continue for k in range(all_feat_ind_task_end - all_feat_ind_task_start + 1): all_features.ix[k + all_feat_ind_task_start, 'on_task_or_break_index'] = k all_features.ix[k + all_feat_ind_task_start, 'consecutive_break'] = i for k in range(new_end - all_feat_ind_task_start + 1): all_features.ix[k + all_feat_ind_task_start, 'on_break_and_after_index'] = k if k <= 15: all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = False elif k <= 30: all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = np.nan else: all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = True for k in range(all_feat_ind_task_end - all_feat_ind_task_start, -1, -1): all_features.ix[all_feat_ind_task_end - k, 'on_task_or_break_index_down'] = k all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'on_task'] = False all_features['person_id'] = cog_res['person_id'][0] all_features.to_csv(path_or_buf=path + ident + '/' + ident + '-data.csv', index=False)
import dataextractor as de import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from sklearn.neural_network import MLPRegressor from sklearn.model_selection import train_test_split extr = de.DataExtractor() data = extr.load_json().to_array() print(len(data)) print(len(data[0])) # Acousticiness [0] # Dancibility [1] # Duration [2] # Energy [3] # Explicit [4] # Instrumentalness [5] # Key [6] # Liveness [7] # Loudness [8] # Mode [9] #d # Speechiness [11] # Tempo [12] # Time signature [13] # Valence [14] print(data[0]) labels = data[:,-1]