def Read_Preproc_FeatExtract(self): global subjects_dic, hyp_dic, dic_pciked_chans subjects_dic = {} hyp_dic = {} dic_pciked_chans = {} #### ======================= Create log window ====================#### self.log_win = Toplevel() self.log_win.title("Log file of current processes") # Label self.label = Label(self.log_win, text="Process log file:", font='Helvetica 12 bold') self.label.pack() self.close_log_win = Button(self.log_win, text="Dismiss", command=self.log_win.destroy) self.close_log_win.pack() #### ======================= Read data files ======================#### for idx, c_subj in enumerate(data_files_list): self.log1_ = Label(self.log_win, text="Analyzing data: " + str(c_subj[-11:-4]) + "\tPlease wait ...").pack() print(f'Analyzing data: {c_subj[-11:-4]}') ## Read in data file = data_files_list[idx] tic = time.time() data = mne.io.read_raw_edf(file) # Data raw EEG --> Deactive # data.plot(duration = 30, highpass = .3 , lowpass = 25 ) raw_data = data.get_data() print('Time to read EDF: {}'.format(time.time() - tic)) self.log2_ = Label(self.log_win, text="Time to read EDF data (s): " + str(np.round(time.time() - tic))).pack() #####=================Retrieving information from data====================##### DataInfo = data.info AvailableChannels = DataInfo['ch_names'] self.fs = int(DataInfo['sfreq']) #####==================Choosing channels of interest======================##### # 1. The channels that need to be referenced Mastoids = ['TP10'] # Reference electrodes RequiredChannels = ['C3'] # main electrodes # 2. Channels that don't need to be referenced: --> Deactive Idx = [] Idx_Mastoids = [] #####================= Find index of required channels ===================##### for indx, c in enumerate(AvailableChannels): if c in RequiredChannels: Idx.append(indx) elif c in Mastoids: Idx_Mastoids.append(indx) #####===== Sampling rate is 200hz; thus 1 epoch(30s) is 6000 samples =====##### T = 30 #secs len_epoch = self.fs * T start_epoch = 0 n_channels = len(AvailableChannels) #####============ Cut tail; use modulo to find full epochs ===============##### raw_data = raw_data[:, 0:raw_data.shape[1] - raw_data.shape[1] % len_epoch] #####========== Reshape data [n_channel, len_epoch, n_epochs] ============##### data_epoched = np.reshape( raw_data, (n_channels, len_epoch, int(raw_data.shape[1] / len_epoch)), order='F') #####===================== Reading hypnogram data ========================##### hyp = loadtxt(hypno_files_list[idx], delimiter="\t") ### Create sepereate data subfiles based on hypnogram (N1, N2, N3, NREM, REM) tic = time.time() #####================= Concatenation of selected channels ================##### # Calculate referenced channels: data_epoched_selected = data_epoched[Idx] - data_epoched[ Idx_Mastoids] #####================= Find order of the selected channels ===============##### #Init picked_channels = [] picked_refs = [] List_Channels = [] # Find main channels for jj, kk in enumerate(Idx): picked_channels = np.append(picked_channels, AvailableChannels[kk]) # Find references for jj, kk in enumerate(Idx_Mastoids): picked_refs = np.append(picked_refs, AvailableChannels[kk]) print( f'subject LK {c_subj} --> detected channels: {str(picked_channels)} - {str(picked_refs)}' ) self.log3_ = Label(self.log_win, text="Dectected channels:" + str(picked_channels) + "-" + str(picked_refs)).pack() # Create lis of channels for kk in np.arange(0, len(Idx)): List_Channels = np.append( List_Channels, picked_channels[kk] + '-' + picked_refs[kk]) #%% Analysis section #####================= remove chanbnels without scroing ==================##### # assign the proper data and labels x_tmp_init = data_epoched_selected y_tmp_init = hyp #Define ssccoorriinngg object: self.Object = ssccoorriinngg(filename='', channel='', fs=self.fs, T=30) # Ensure equalituy of length for arrays: self.Object.Ensure_data_label_length(x_tmp_init, y_tmp_init) # Remove non-scored epochs x_tmp, y_tmp = self.Object.remove_channels_without_scoring( hypno_labels=y_tmp_init, input_feats=x_tmp_init) # Remove disconnections '''x_tmp, y_tmp = self.Object.remove_disconnection(hypno_labels= y_tmp, input_feats=x_tmp) ''' #####============= Create a one hot encoding form of labels ==============##### # Create binary labels array self.yy = self.Object.One_hot_encoding(y_tmp) # Ensure all the input labels have a class self.Object.Unlabaled_rows_detector(self.yy) #%% Function: Feature_Extraction # Initialize feature array: self.Feat_all_channels = np.empty((np.shape(x_tmp)[-1], 0)) #####================== Extract the relevant features ====================##### for k in np.arange(np.shape(data_epoched_selected)[0]): feat_temp = self.Object.FeatureExtraction_per_subject( Input_data=x_tmp[k, :, :]) self.Feat_all_channels = np.column_stack( (self.Feat_all_channels, feat_temp)) toc = time.time() print( f'Features of subject { c_subj[-11:-4]} were successfully extracted in: {toc-tic} secs' ) self.log4_ = Label(self.log_win, text="Features of subject" + str(c_subj[-11:-4]) + " were successfully extracted in (secs):" + str(np.round(toc - tic))).pack() # Double check the equality of size of arrays self.Object.Ensure_feature_label_length(self.Feat_all_channels, self.yy) # Defining dictionary to save features PER SUBJECT subjects_dic["subject{}".format( c_subj[-11:-4])] = self.Feat_all_channels # Defining dictionary to save hypnogram PER SUBJECT hyp_dic["hyp{}".format(c_subj[-11:-4])] = self.yy # Show picked channels per subject dic_pciked_chans["subj{}".format(c_subj[-11:-4])] = List_Channels #####=============== Removing variables for next iteration ===============##### del x_tmp, y_tmp, feat_temp toc = time.time() print( 'Feature extraction of subject { c_subj[-11:-4]} has been finished.' ) self.log5_ = Label(self.log_win, text="Feature extraction of subject " + str(c_subj[-11:-4]) + " has been finished.").pack() print( 'Total feature extraction of subjects took {tic_tot - time.time()} secs.' )
Main_path = "P:/3013080.01/" subject_Id_folder = Main_path + "Autoscoring/ssccoorriinngg/" Data_folder = Main_path + "Zmax_Data/" Hypnogram_folder = Main_path + "somno_scorings/Rathiga/" #####===================== Reading EDF data files=========================##### subject_ids = loadtxt(subject_Id_folder + "Zmax/Subject_ids_excluding 22_2.txt", dtype='str', delimiter='\n') #####============= create an object of ssccoorriinngg class ==============##### Object = ssccoorriinngg(filename='', channel='', fs=256, T=30) # ============================================================================= # # #Initialization # subjects_dic = {} # hyp_dic = {} # metrics_per_fold = {} # raw_data_dic = {} # tic_tot = time.time() # # # Igonre unnecessary warnings # np.seterr(divide='ignore', invalid='ignore') # # #####============= Iterate through each subject to find data =============##### # # for idx, c_subj in enumerate(subject_ids):
subj_c = [] # Control subj_p = [] # Patients # Find control subject IDs for indx, c in enumerate(gp): if c[1] == 'C': subj_c.append(int(c[0])) elif c[1] == 'CC': pass else: subj_p.append(int(c[0])) # Initialization subjects_dic = {} hyp_dic = {} metrics_per_fold = {} # create an object of ssccoorriinngg class Object = ssccoorriinngg(filename='', channel='fp1-fp2', fs = 200, T = 30) #%% Read data per subject and assign it to relevant array for idx, c_subj in enumerate(subj_c): print (f'Analyzing Subject Number: {c_subj}') tic = time.time() path = 'D:/1D_TimeSeries/raw_EEG/full/Fp1-Fp2/' with h5py.File(path +'LK_'+ str(c_subj) + '_1.h5', 'r') as rf: x_tmp_init = rf['.']['data_fp1-fp2'].value y_tmp_init = rf['.']['hypnogram'].value print (f'Featrueset and hypno of subject {c_subj} was successfully loaded.') # Remove bad signals (hyp == -1) x_tmp, y_tmp = Object.remove_bad_signals(hypno_labels = y_tmp_init, input_feats = x_tmp_init)