def prepare_tags_e4(filepath):#integrate save tags if not existant, move to svm.py base = files.load('data\\Segments.csv') for folder in os.listdir(filepath): if folder.startswith("Coaster"): folderpath = os.path.join(filepath, folder) filelist = os.listdir(folderpath) for file in filelist: proc_data = {} if file.startswith("Part"): data = files.load(os.path.join(folderpath,file)) snip_index, snip_time = snipping(data, snip_len) part = int(re.search('Id(.+?)\.csv',file).group(1)) SCR = pd.io.parsers.read_csv(os.path.abspath('C:\\Users\\louwa\\Documents\\Python Master\\Project Files\\data\\peaks\\'+folder+'\\Peaks_Part'+str(part)+".csv"), dtype = {'EDA': np.float64}, index_col = [0], header = [0], parse_dates = True)#mach des wieder load SCR_max = SCR.EDA.max() baseline = base[(base["Participant"]==part)&(base["Video"]==0)] for tag in snip_index: case = snip_index[tag] #case_time = snip_time[tag] for i in range(0,len(case)): #wo anders hi case_index = case[i] case_data = data.loc[case_index] case_SCR = SCR[(SCR.index >= case_index[0])&(SCR.index <= case_index[-1])] #HR HR_avg = check_finite((case_data["HR"].mean()-baseline["HR_avg"].item())/(lim_HR-baseline["HR_avg"].item())) HR_max = check_finite((case_data["HR"].max()-baseline["HR_avg"].item())/(lim_HR-baseline["HR_avg"].item())) HR_minmax = check_finite((case_data["HR"].max()-case_data["HR"].min())/(lim_HR-baseline["HR_avg"].item())) #HRV NN = case_data[(case_data["IBI"]>-1)&(case_data["IBI"]<2000)] NN_avg = check_finite((NN.IBI.mean())/(snip_len*1000)) SDNN = check_finite((NN.IBI.std())/(snip_len*1000)) NN = NN.IBI.tolist() NNdif = [] for i in range(0, len(NN)-1): NNdif.append(abs(NN[i]-NN[i+1])) NNdif = np.asarray(NNdif) SDSD = check_finite((NNdif.std())/(snip_len*1000)) RMSSD = check_finite(np.sqrt(np.mean(NNdif**2))/((snip_len*1000)**2)) try: pNN20 = check_finite(float(len([x for x in NNdif if (x>20)])) / float(len(NNdif))) except ZeroDivisionError: pNN20 = 0 try: pNN50 = check_finite(float(len([x for x in NNdif if (x>50)])) / float(len(NNdif))) except ZeroDivisionError: pNN50 = 0 #EDA SCR_avg = check_finite(case_SCR.EDA.mean()/SCR_max) SCR_n = check_finite((len(case_SCR)/snip_len)*60) variables = [HR_avg, HR_max, HR_minmax, NN_avg, SDNN, SDSD, RMSSD, pNN20, pNN50, SCR_avg, SCR_n] try: proc_data[tag].append(variables) except KeyError: proc_data[tag] = [variables] files.save_dict(proc_data,"data\\tags\\"+folder+"\\Tags_Participant_"+str(part))#save to proper place
def segment_e4(filepath, sampleRate, segments): headers = ["Participant","Video","Condition","Segment","HR_avg","HR_max","SCR","SCL"] var_list = [] for folder in os.listdir(filepath): folderpath = os.path.join(filepath,folder) for file in os.listdir(folderpath): if file.startswith("Part"): data = files.load(os.path.join(folderpath,file)) condition = 0 part = int(re.search('Id(.+?)\.csv',file).group(1)) peak_data = files.load(os.path.abspath(r"C:\\Users\\louwa\\Documents\\Python Master\\Project Files\\data\\park\\peaks\\"+folder+"\\Peaks_Part"+str(part)+".csv")) if folder == "Baseline": seg = 0 video = 0 frame = data scr_peak = peak_data HR_avg, HR_max,scr,scl = fetch_var(frame, scr_peak,sampleRate) var_list.append([part, video, condition, seg, HR_avg, HR_max,scr,scl]) else: seg = 1 video = int(folder[-1]) if video == 7: video = 6 for i in segments: try: start = data[data["Tag"]==i[0]].iloc[0].name try: start = data.index.get_loc(start).start except AttributeError: start = data.index.get_loc(start) end = data[data["Tag"]==i[1]].iloc[0].name try: end = data.index.get_loc(end).start except AttributeError: end = data.index.get_loc(end) print(file, folder) frame = data.iloc[start:end] starttime = data.iloc[start].name endtime = data.iloc[end].name mask = (peak_data.index >= starttime) & (peak_data.index <= endtime) scr_peak = peak_data.loc[mask] HR_avg, HR_max,scr,scl = fetch_var(frame, scr_peak, sampleRate) var_list.append([part, video, condition, seg, HR_avg, HR_max,scr,scl]) seg+=1 except IndexError: print(i, part, video) files.save(var_list, headers, "Participant", "Segments") files.save(var_list, headers, "Participant", "data\\park\\Segments")
def test_segments(filepath): data = files.load(os.path.join(filepath, "Segments.csv")) print("__FULL__") tests(data) for i in range(1, int(data["Video"].max()) + 1): vid = data[data.Video == i] print("__VID " + str(i) + "__") tests(vid)
def clean_raw(filepath, Shimmer = True, folder = None): tags = {} start_times = [] if Shimmer: filelist = os.listdir(os.path.abspath(filepath+'\static')) for file in filelist: if file.startswith("Tagging"): tagging = files.load(os.path.abspath(filepath+'\static\\'+ file), index_col = [1], tag = True) video = re.search('Tagging (.+?)',file).group(1) tags[video] = tagging elif file.startswith("Demographics"): demographics = files.load(os.path.abspath(filepath+'\static\\'+ file),parse_dates = False) if Shimmer: filepath = os.path.abspath(filepath+'\\raw') process(filepath, Shimmer, folder, tags, demographics = demographics) else: filepath = os.path.join(filepath,folder) filelist = os.listdir(filepath) for file in filelist: if file.startswith("Tagging"): tags = files.load(os.path.join(filepath,file), index_col = [1], tag = True) print(folder, filelist) for file in filelist: if file.startswith("Motion"): motion = files.load(os.path.join(filepath,file)) start_Shimmer = MP.find_start(motion, 16) start_times.append(start_Shimmer) motion = apply_tags(motion, "park", tags, start_Shimmer) motion.to_csv(os.path.join(filepath,file)) if folder == "Baseline": start_Shimmer = None process(filepath, Shimmer, folder, tags, start_Shimmer = start_Shimmer)
def process(filepath, Shimmer, folder, tags, demographics = None, start_Shimmer = None): filelist = os.listdir(filepath) for file in filelist: if (file.endswith(".csv") and Shimmer) or file.startswith("Part"): if Shimmer: part = re.search('Session(.+?)_',file).group(1) print("Processing Participant " + str(part)) raw_data, sampleRate = files.Shimmer(os.path.join(filepath,file)) else: part = re.search('Id(.+?)\.csv',file).group(1) raw_data = files.load(os.path.join(filepath,file)) sampleRate=8 if folder != "Baseline": start = MP.find_start(raw_data[["AccelX","AccelY","AccelZ"]], sampleRate) if start == None: start = start_Shimmer ''' Heart Rate ''' # if more than 10% of the HR are above 220 or below 40 use heartpy for HR+ print(file, folder) hr, ibi = HR.replace(raw_data["PPG"].values, sampleRate) raw_data["HR"] = np.asarray(hr) raw_data["IBI"] = np.asarray(ibi) raw_data.loc[(raw_data['HR']>220) | (raw_data['HR']<40)] = np.nan """ EDA """ labels, raw_data = EDA_art.classify(raw_data, ["Multiclass"], sampleRate) if Shimmer: EDA_peak.calcPeakFeatures(raw_data,"data\\vr\\peaks\\Peaks_Part"+str(part)+".csv", 1, 0, 2, 2, sampleRate) else: EDA_peak.calcPeakFeatures(raw_data,"data\\park\\peaks\\"+str(folder)+"\\Peaks_Part"+str(part)+".csv", 1, 0, 2, 2, sampleRate) if Shimmer: condition = demographics["Condition"].loc[int(part)] data = apply_tags(raw_data, condition, tags) elif folder != "Baseline": data = apply_tags(raw_data, "park", tags, start) else: raw_data["Tag"] = "" data= raw_data if Shimmer: data.to_csv("data\\vr\\clean\\"+'_'.join(["Participant"+str(part),"Condition"+str(int(condition))])+".csv") else: data.to_csv(os.path.join(filepath,file))
def distance(): data = files.load("Segments.csv") maxval = [] maxdif = [] for i in range(1, int(data["Participant"].max() + 1)): df = data[data["Participant"] == i] if len(df) > 0: top = df["SCR"].max() maxval.append(top) base = df[df["Video"] == 0] maxdif.append(top - base["SCL"].item()) ax = sns.distplot(maxval, bins=13, color="#feb24c", kde=False, norm_hist=True) ax2 = ax.twinx() ax.set_ylabel('Density') ax.set_xlabel("Overall maximal HR per participant") ax2.set_ylabel('Difference to baseline') sns.scatterplot(maxval, maxdif, ax=ax2, color="#fd8d3c") plt.rcParams['figure.figsize'] = (10, 8)
def segment(filepath, sampleRate, segments, study, Shimmer = True, folder=None): tenSec = sampleRate*10 if Shimmer: filelist = os.listdir(os.path.abspath(filepath+'\\clean')) else: filepath = os.path.join(filepath, folder) filelist = os.listdir(filepath) headers = ["Participant","Video","Condition","Segment","HR_avg","HR_max","SCR","SCL"] var_list = [] for file in filelist: if file.startswith("Part"): try: if Shimmer: data = files.load(os.path.abspath(filepath+'\\clean\\'+file)) condition = int(re.search('Condition(.+?).csv',file).group(1)) part = int(re.search('Participant(.+?)_',file).group(1)) print(part, data.columns) peak_data = files.load(os.path.abspath(filepath+'\\peaks\\Peaks_Part'+str(part)+".csv")) else: data = files.load(os.path.join(filepath,file)) condition = 0 part = int(re.search('Id(.+?)\.csv',file).group(1)) part_name = re.search('Part(.+?)_',file).group(1) peak_data = files.load(os.path.abspath(r"C:\\Users\\louwa\\Documents\\Python Master\\Project Files\\data\\peaks\\"+folder+"\\Peaks_Part"+str(part)+".csv")) seg = 0 for i in segments: for j in range(0, segments[seg][2]): if segments[seg][0] == "base": video = 0 start = start_base*60*sampleRate else: if Shimmer: video = VIDEOS[condition][j] else: video = folder[-1] start = data[data["Tag"]==segments[seg][0]].iloc[j].name try: start = data.index.get_loc(start).start except AttributeError: start = data.index.get_loc(start) end = data[data["Tag"]==segments[seg][1]].iloc[j].name try: end = data.index.get_loc(end).start except AttributeError: end = data.index.get_loc(end) if segments[seg][0] != "base" or Shimmer: frame = data.iloc[start:end] starttime = data.iloc[start].name endtime = data.iloc[end].name mask = (peak_data.index >= starttime) & (peak_data.index <= endtime) scr_peak = peak_data.loc[mask] scr = (len(scr_peak) / float(len(frame)/sampleRate)) * 60 else: frame = files.load(os.path.abspath(r'C:\\Users\\louwa\\Documents\\Python Master\\Project Files\\data\\clean\\Baseline\\Part'+part_name+'_Id'+str(part)+".csv")) base_peak = files.load(os.path.abspath(r'C:\\Users\\louwa\\Documents\\Python Master\\Project Files\\data\\peaks\\Baseline\\Peaks_Part'+str(part)+".csv")) scr = (len(base_peak) / float(len(frame)/sampleRate)) * 60 HR_avg = frame["HR"].mean() HR_max = [] for j in range(0, int(len(frame)/tenSec)+1): last = j*tenSec+tenSec if last <=len(frame): loc = frame.iloc[j*tenSec:last] else: loc = frame.iloc[j*tenSec:len(frame)] HR_max.append(loc.HR.max()) HR_max = sum(HR_max)/len(HR_max) scl = frame['filtered_eda'].mean() var_list.append([part, video, condition, seg, HR_avg, HR_max,scr,scl]) seg += 1 except IndexError: print("Participant", part, "failed") files.save(var_list, headers, "Participant", "Segments") files.save(var_list, headers, "Participant", "data\\"+study+"\\Segments")