def get_ref_df_RPA(ref_test_files, all_files, cachedir, segement=None): print('using segement 12s ', cachedir) # window_len = 250 * segement # 12 seconds with open(all_files, 'r') as f: all_filenames = f.readlines() print(len(all_filenames)) # print (all_filenames[-1]) count = 0 f_prev = 'none' with open(ref_test_files, 'r') as f: while True: # Get next line from file line = f.readline() # if line is empty # end of file is reached if not line: break # print("Line{}: {}".format(count, line.strip())) fn, st, sp, cl, _ = line.strip().split(' ') # if fn == '00010418_s016_t006': # (pyst: nedc_load_edf): failed to open # continue count += 1 if sp != 'END': st, sp = float(st), float(sp) # print (fn, st, sp, cl) else: st = float(st) if fn != f_prev: fn_full = [name for name in all_filenames if fn in name] print(fn_full) if len(fn_full) == 1: fn_full = fn_full[0].strip() print(fn_full) try: fsamp, data = read_edf_elec( fn_full, parameters="params_RPA_addECG.txt") print(fsamp, data.shape) except: print('can not read', fn_full) with open('RPA_ECG_wrong_1.txt', 'a') as f1: f1.write(fn_full + '\n') f1.close() continue # resample to 250 if sampling rate is higher # if fsamp > 250: # print('Resampling data from {} to 250 Hz'.format(fsamp)) # data = resample(data, int(data.shape[1] * 250.0 / fsamp), axis=1) else: print('same file') if sp != 'END': end = sp * fsamp else: end = data.shape[1] i = 0 window_len = int(fsamp * segement) # 12 seconds chs = [ 'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'O2' ] while (st + i) * fsamp + window_len < end: s = data[:19, int((st + i) * fsamp):int((st + i) * fsamp) + window_len] s_eeg = data[19:, int((st + i) * fsamp):int((st + i) * fsamp) + window_len] * 500 # s_ap, chs_ap_ = convert_AP_montage(seg, chs) # chs_ap_ = [ch for ch in chs_ap_] # detect if signal is interupted, e.g., all dc, overflow if detect_interupted_data(s.transpose(), fsamp): print( 'BAD DATA DETECTED! Skipping this {}-second segment due to interupted signals...' .format(segement)) i += 12 continue else: print('GOOD DATA!') # raw = create_mne_raw(s, fsamp, chs) # raw.plot(block=True, scalings=50e-6, remove_dc=True, lowpass=70, title='Raw - 0.5-70 Hz') ica_filt_s = ica_arti_remove(s, fsamp, chs) if ica_filt_s is None: print( 'Skipping this {}-second segment due to failed ICA...'. format(segement)) i += 12 continue # raw_ica = create_mne_raw(ica_filt_s, fsamp, chs) # raw_ica.plot(block=True, scalings=50e-6, remove_dc=True, lowpass=70, title='ICA-denoised - 0.5-70') # resample to 250 if sampling rate is higher ica_filt_s = resample(ica_filt_s, int(ica_filt_s.shape[1] * 250.0 / fsamp), axis=1) s_eeg = resample(s_eeg, int(s_eeg.shape[1] * 250.0 / fsamp), axis=1) if cl == "seiz": # train setting #i+=1 # dev setting i += 12 else: # train setting #i+=6 # dev setting i += 12 #print(s.shape) assert s.shape[1] == window_len prep_s = calc_stft(ica_filt_s) ECG = calc_stft(s_eeg) #print(prep_s.shape, ECG.shape) prep_s = np.concatenate(np.concatenate([prep_s, ECG], axis=2)) prep_s = np.expand_dims(prep_s, axis=0) print('stft shape', prep_s.shape) prep_fn = '{}/{}_{}_{}_{}.npy'.format(cachedir, fn, i, cl, st) print('save to {}'.format(prep_fn)) assert prep_s.shape == (1, 2 * segement - 1, 20, 125) np.save(prep_fn, prep_s) f_prev = fn
def get_ref_train_df_TUH(ref_train_file, all_files, cachedir, segement=None): print('using segement 12s ') window_len = 250 * segement # 12 seconds with open(all_files, 'r') as f: all_filenames = f.readlines() print(len(all_filenames)) # print (all_filenames[-1]) count = 0 with open(ref_train_file, 'r') as f: while True: # Get next line from file line = f.readline() # if line is empty # end of file is reached if not line: break # print("Line{}: {}".format(count, line.strip())) fn, st, sp, cl, _ = line.strip().split(' ') # if fn == '00010418_s016_t006': # (pyst: nedc_load_edf): failed to open # continue count += 1 st, sp = float(st), float(sp) # print (fn, st, sp, cl) fn_full = [name for name in all_filenames if fn in name] # print (fn_full) if len(fn_full) == 1: fn_full = fn_full[0].strip() print(fn_full) try: fsamp, data = read_edf_elec( fn_full, parameters="params_TUH_ECG.txt") #fsamp, data = read_edf_elec(fn_full,parameters = "params_RPA_common_electrodes.txt") except: print('can not read', fn_full) with open('dev_ica_EEG_wrong.txt', 'a') as f1: f1.write(fn_full + '\n') f1.close() continue print(fsamp, data.shape) # resample to 250 if sampling rate is higher if fsamp > 250: print('Resampling data from {} to 250 Hz'.format(fsamp)) data = resample(data, int(data.shape[1] * 250.0 / fsamp), axis=1) i = 0 chs = [ 'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T3', 'C3', 'Cz', 'C4', 'T4', 'T5', 'P3', 'Pz', 'P4', 'T6', 'O1', 'O2' ] while (st + i) * 250 + window_len < sp * 250: s = data[:, int((st + i) * 250):int((st + i) * 250) + window_len] # detect if signal is interupted, e.g., all dc, overflow if detect_interupted_data(s.transpose(), 250): print( 'BAD DATA DETECTED! Skipping this {}-second segment due to interupted signals...' .format(segement)) i += 12 continue else: print('GOOD DATA!') #raw = create_mne_raw(s, fsamp, chs) #raw.plot(block=True, scalings=50e-6, remove_dc=True, lowpass=70, title='Raw - 0.5-70 Hz') ica_filt_s = ica_arti_remove(s, 250, chs) if ica_filt_s is None: print( 'Skipping this {}-second segment due to failed ICA...' .format(segement)) i += 12 continue if cl == "seiz": # train setting # i+=1 # dev setting i += 12 else: # train setting # i+=6 # dev setting i += 12 assert s.shape[1] == window_len prep_s = calc_stft(ica_filt_s) print('stft shape', prep_s.shape) prep_fn = '{}/{}_{}_{}_{}.npy'.format( cachedir, fn, i, cl, st) assert prep_s.shape == (1, 2 * segement - 1, 19, 125) np.save(prep_fn, prep_s)
def get_ref_train_df(ref_train_file, all_files, cachedir): window_len = 250 * 5 # 5 seconds with open(all_files, 'r') as f: all_filenames = f.readlines() print(len(all_filenames)) # print (all_filenames[-1]) count = 0 with open(ref_train_file, 'r') as f: while True: # Get next line from file line = f.readline() # if line is empty # end of file is reached if not line: break # print("Line{}: {}".format(count, line.strip())) fn, st, sp, cl, _ = line.strip().split(' ') # if fn == '00010418_s016_t006': # (pyst: nedc_load_edf): failed to open # continue count += 1 st, sp = float(st), float(sp) # print (fn, st, sp, cl) fn_full = [name for name in all_filenames if fn in name] # print (fn_full) if len(fn_full) == 1: fn_full = fn_full[0].strip() print(fn_full) fsamp, data = read_edf_elec(fn_full) print(fsamp, data.shape) # resample to 250 if sampling rate is higher if fsamp > 250: print('Resampling data from {} to 250 Hz'.format(fsamp)) data = resample(data, int(data.shape[1] * 250.0 / fsamp), axis=1) i = 0 while (st + i) * 250 + window_len < sp * 250: s = data[:, int((st + i) * 250): int((st + i) * 250) + window_len] diff1 = s[3:4, :] - s[2:3, :] # print(i,'done') diff2 = s[13:14, :] - s[17:18, :] s = np.concatenate((diff1, diff2), axis=0) if cl == "seiz": i+=1 else: i+=3 print('Raw time-series shape', s.shape) assert s.shape[1] == window_len prep_s = calc_stft(s) prep_fn = '{}/{}_{}_{}_{}.npy'.format(cachedir, fn, i, cl, st) print('Preprocessed shape', prep_s.shape) assert prep_s.shape == (1, 9, 2, 125) np.save(prep_fn, prep_s) # getting "previous" signals for seizure data # take 2 seconds before seizure and concat with the 1st second of sz if cl == "seiz": for i_a in range(2): if st - i_a - 3 >= 0: s = data[:, int((st - i_a - 3) * 250): int((st - i_a - 3) * 250) + window_len] diff1 = s[3:4, :] - s[2:3, :] # print(i,'done') diff2 = s[13:14, :] - s[17:18, :] s = np.concatenate((diff1, diff2), axis=0) print('Additional raw time-series shape', st - i_a - 3, s.shape, int((st - i_a - 3) * 250), int((st - i_a - 3) * 250) + window_len) if s.shape[1] == window_len: # assert s.shape[1] == window_len prep_s = calc_stft(s) prep_fn = '{}/{}_{}_{}_{}.npy'.format(cachedir, fn, -i_a - 3, cl, st) assert prep_s.shape == (1, 9, 2, 125) print('Additional preprocessed shape', prep_s.shape) np.save(prep_fn, prep_s)