def compute_basisvector(cjn, name, N): """ Compute the basis vectors of the orthonormal basis corresponding to the list cj,n Input: type cjn = list of length j cj,n = 0 if the filter is g / 1 if the filter is h type name = string name = Name of the wavelet filter type N = integer N = Length of the time series (multiple of 2**j) Output: type C = N * N / 2**j numpy array C = N / 2**j basis vectors of length N """ g = DWT.get_scaling(name) h = DWT.get_wavelet(g) J = len(cjn) C = np.identity(int(N / (2**J))) for j in range(J, 0, -1): if (cjn[j - 1] == 0): Cj = DWT.compute_AB(g, j, N) else: Cj = DWT.compute_AB(h, j, N) C = np.matmul(np.transpose(Cj), C) return C
def get_DWPT(X, name, J): """ Compute the DWPT of X up to level J Input: type X = 1D numpy array X = Time series which length is a multiple of 2**J type name = string name = Name of the wavelet filter type J = integer J = Level of partial DWPT Output: type W = list of J+1 1D numpy arrays W = Vectors of DWPT coefficients at levels 0, ... , J """ assert (type(J) == int), \ 'Level of DWPT must be an integer' assert (J >= 1), \ 'Level of DWPT must be higher or equal to 1' N = np.shape(X)[0] assert (N % (2 ** J) == 0), \ 'Length of time series is not a multiple of 2**J' g = DWT.get_scaling(name) h = DWT.get_wavelet(g) W = [X] for j in range(1, J + 1): Wjm1 = W[-1] Wj = get_Wj(h, g, j, Wjm1) W.append(Wj) return W
def create_prob_functions(DWT): #creating the 5 sets of EEG signals #eyes open rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/eyes open" setA = createSets(rootdir) #eyes closed rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/eyes closed" setB = createSets(rootdir) #same side as seizure rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/epileptic hemisphere" setC = createSets(rootdir) #opposite side of seizure rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/opposite hemisphere" setD = createSets(rootdir) #actual seizure rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/Seizure" setE = createSets(rootdir) #creating coeffecients testA_coeffs = DWT.getCoeffecients(setA) testB_coeffs = DWT.getCoeffecients(setB) testC_coeffs = DWT.getCoeffecients(setC) testD_coeffs = DWT.getCoeffecients(setD) testE_coeffs = DWT.getCoeffecients(setE) approximate_coeff = [] outputSet = [0] * 400 + [1] * 100 #creating the rows in dataframe for setLen in range(0, 100): #approximate approximate_coeff.append(testA_coeffs[setLen][0]) approximate_coeff.append(testB_coeffs[setLen][0]) approximate_coeff.append(testC_coeffs[setLen][0]) approximate_coeff.append(testD_coeffs[setLen][0]) approximate_coeff.append(testE_coeffs[setLen][0]) #d2 """ d2coef.append(testA_coeffs[setLen][1]) d2coef.append(testB_coeffs[setLen][1]) d2coef.append(testC_coeffs[setLen][1]) d2coef.append(testD_coeffs[setLen][1]) d2coef.append(testE_coeffs[setLen][1]) #d3 d1coef.append(testA_coeffs[setLen][2]) d1coef.append(testB_coeffs[setLen][2]) d1coef.append(testC_coeffs[setLen][2]) d1coef.append(testD_coeffs[setLen][2]) d1coef.append(testE_coeffs[setLen][2]) """ total_df = pd.DataFrame(data=approximate_coeff) #total_df['Approximate']=approgitximate_coeff #total_df['D2']=d2coef #total_df['D1']=d1coef total_df['Output'] = outputSet #total_df=total_df.sample(frac=1).reset_index(drop=True) #cprint(total_df) return total_df
def plot_worker(frame): ''' Calculate ch0 and ch1 filtered data. Then calculate feature vectors according to the method selected Raises: KeyError -- Error raised if feat_method set wrong in config dict ''' global ch0_line, ch1_line, ch0_line_gaussed, ch1_line_gaussed, ch0_fft_line, ch1_fft_line, ch0_grad_line global ch0_list, ch1_list global x_list, new_feature global feat0, feat1, filtered_ch0, filtered_ch1 if not paused: ch0_line.set_data(x_list,ch0_list) ch1_line.set_data(x_list,ch1_list) # Gaussian filtering gauss_inp_ch0 = np.array(ch0_list) filtered_ch0 = scipy.ndimage.filters.gaussian_filter1d(gauss_inp_ch0, sigma=config['ch0_gauss_filter_sigma']) gauss_inp_ch1 = np.array(ch1_list) filtered_ch1 = scipy.ndimage.filters.gaussian_filter1d(gauss_inp_ch1, sigma=config['ch1_gauss_filter_sigma']) ch0_line_gaussed.set_data(x_list_np,filtered_ch0) ch1_line_gaussed.set_data(x_list_np,filtered_ch1) # ========================================================================================== # # fft plot # N = np.arange(config['x_window']) # fft0 = np.fft.fft(filtered_ch0) # fft1 = np.fft.fft(filtered_ch1) # freq = np.fft.fftfreq(config['x_window'],d=(config['sample_time_period']/1000))*2000 # ch0_fft_line.set_data(freq, fft0.real) # ch1_fft_line.set_data(freq, fft1.real) # ========================================================================================== if config['feat_method']=='psd': # PSD extract feat0 = PSD.PSD_extractor(ch0_list) feat1 = PSD.PSD_extractor(ch1_list) N = np.arange(config['psd_feature_size']*3) ch0_fft_line.set_data(N, np.array(feat0)) ch1_fft_line.set_data(N, np.array(feat1)) # ========================================================================================== elif config['feat_method']=='dwt': # DWT extract feat0 = DWT.DWT_extractor(ch0_list) feat1 = DWT.DWT_extractor(ch1_list) N = np.arange(len(feat0)) ch0_fft_line.set_data(N, np.array(feat0)) ch1_fft_line.set_data(N, np.array(feat1)) # ========================================================================================== else: raise KeyError("invalid feat method") new_feature = True if config['compute_concentration_energy']: concentration_energy = np.trapz(feat0[20:30],dx=1)#/np.trapz(feat0[8:12],dx=1) print(concentration_energy) time.sleep(config['sample_time_period']/1000) return ch0_line, ch1_line, ch0_line_gaussed, ch1_line_gaussed, ch0_fft_line, ch1_fft_line
def interpolate(L): LH = np.zeros(shape=L.shape, dtype=np.float64) HL = np.zeros(shape=L.shape, dtype=np.float64) HH = np.zeros(shape=L.shape, dtype=np.float64) H = (LH, HL, HH) _L_ = DWT.synthesize(L, H) return _L_
def get_wavelet(g): """ Return the coefficients of the MODWT wavelet filter Input: type g = 1D numpy array g = Vector of coefficients of the MODWT scaling filter Output: type h = 1D numpy array h = Vector of coefficients of the MODWT wavelet filter """ h = DWT.get_wavelet(g) return h
def pre_process(data): #discrete wavelet transform wavelets = DWT.getCoeffecients(data) #different wavelets approximate_coeff = [] d2coef = [] d1coef = [] for i in range(0, 17): #approximate for A for a in ((wavelets[i][0])): approximate_coeff.append(a) #d2 for A for da2 in ((wavelets[i][1])): d2coef.append(da2) #d3 for A for da1 in ((wavelets[i][2])): d1coef.append(da1) #kmeans clustering A2distribution = kMeans.runModelTest(approximate_coeff, 62, "A2.pckl") D2distribution = kMeans.runModelTest(d2coef, 62, "D2.pckl") D1distribution = kMeans.runModelTest(d1coef, 122, "D1.pckl") #d probability istribution dataset = pd.DataFrame(np.zeros((17, 18))) for subband in range(len(A2distribution)): dataset.loc[subband] = ((A2distribution[subband] + D2distribution[subband] + D1distribution[subband])) dataset.columns = [ "A2k1", "A2k2", "A2k3", "K2k4", "A2k5", "A2k6", "D2k1", "D2k2", "D2k3", "D2k4", "D2k5", "D2k6", "D1k1", "D1k2", "D1k3", "K2k4", "D1k5", "D1k6", ] print(dataset) return dataset
def get_scaling(name): """ Return the coefficients of the MODWT scaling filter Input: type name = string name = Name of the wavelet filter Output: type g = 1D numpy array g = Vector of coefficients of the MODWT scaling filter """ g = DWT.get_scaling(name) g = g / sqrt(2.0) return g
"D1k3", "K2k4", "D1k5", "D1k6", ] print(dataset) return dataset def testResults(subband, output): newDataSet = pre_process(subband) newModel = mlpnn.predictModel("my_model.h5", newDataSet) if output == '1': return newModel / len(subband) else: return (len(subband) - newModel) / len(subband) DWT = DWT.DWT() kMeans = kMeans.kMeans() mlpnn = mlpnn.MLPNN() testing = open( r"C:\Users\Nathan Joseph\Desktop\CPEG498\SortedData\eyes closed\O001.txt") testing = testing.read().split('\n') testing.pop() testing = np.array(testing) testing = np.split(testing, 17) print(len(testing)) print(testResults(testing, '1'))
def reduce(_L_): L, _ = DWT.analyze(_L_) return L
def compute_misfit(isource, j): """Function to compute the misfit Input: isource = index of the source j = scale at which we run the inversion process """ namedir1 = 'Source_' + str(isource + 1) os.chdir(namedir1) dirname_d = '../../Data/' dirname_s = 'OUTPUT_FILES/' filename_d = 'data_shot' + str(isource + 1) + '.bin' t_d = numpy.arange(0.0, nt_d * dt_d, dt_d) t_s = numpy.arange(0.0, nt_s * dt_s, dt_s) t_ref = numpy.arange(0.0, nt_ref * dt_ref, dt_ref) fv_d = numpy.fromfile(dirname_d + filename_d, 'f4') v_d = numpy.reshape(fv_d, (nrec, nt_d)) misfit = 0.0 adj = numpy.zeros(nt_ref) for irec in range(rstart - 1, rend): d = v_d[irec, :] if irec + 1 < 10: prefix = 'AA.S000' + str(irec + 1) elif irec + 1 < 100: prefix = 'AA.S00' + str(irec + 1) else: prefix = 'AA.S0' + str(irec + 1) # filename_s = prefix + '.BXZ.semd' filename_s = prefix + '.PRE.semp' fv_s = numpy.loadtxt(dirname_s + filename_s) s = f * fv_s[:, 1] istart = int((tstart + irec * 25.0 * sstart) / dt_ref) iend = int((tend + irec * 25.0 * send) / dt_ref) f_d = interpolate.interp1d(t_d, d) f_s = interpolate.interp1d(t_s, s) d_inter = f_d(t_ref) s_inter = f_s(t_ref) (d_WT, NA_d) = DWT.WT(d_inter, nt_ref, j) (s_WT, NA_s) = DWT.WT(s_inter, nt_ref, j) for it in range(0, nt_ref + 1): if it >= istart and it <= iend: misfit += 0.5 * numpy.power(s_WT[it] - d_WT[it], 2.0) adj[it] = s_WT[it] - d_WT[it] f_a = interpolate.interp1d(t_ref, adj) adj_int = f_a(t_s) # filename_x = 'SEM/' + prefix + '.BXX.adj' # filename_y = 'SEM/' + prefix + '.BXY.adj' # filename_z = 'SEM/' + prefix + '.BXZ.adj' # filename_p = 'SEM/' + prefix + '.PRE.adj' filename_p = 'SEM/' + prefix + '.POT.adj' # source_x = numpy.ndarray((nt_s, 2)) # source_y = numpy.ndarray((nt_s, 2)) # source_z = numpy.ndarray((nt_s, 2)) source_p = numpy.ndarray((nt_s, 2)) # source_x[:, 0] = t_s # source_y[:, 0] = t_s # source_z[:, 0] = t_s source_p[:, 0] = t_s # source_x[:, 1] = adj_int # source_y[:, 1] = numpy.zeros(nt_s) # source_z[:, 1] = numpy.zeros(nt_s) source_p[:, 1] = adj_int # numpy.savetxt(filename_x, source_x) # numpy.savetxt(filename_y, source_y) # numpy.savetxt(filename_z, source_z) numpy.savetxt(filename_p, source_p) os.chdir('..') return misfit
def create_prob_functions(DWT, kMeans): #creating the 5 sets of EEG signals #eyes open rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/eyes open" setA = createSets(rootdir) #eyes closed rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/eyes closed" setB = createSets(rootdir) #same side as seizure rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/epileptic hemisphere" setC = createSets(rootdir) #opposite side of seizure rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/opposite hemisphere" setD = createSets(rootdir) #actual seizure rootdir = "C:/Users/Nathan Joseph/Desktop/CPEG498/SortedData/Seizure" setE = createSets(rootdir) #dividing each subband into 17 parts (each representing around 2 seconds of data) test_a = divideArray(setA, 17) test_b = divideArray(setB, 17) test_c = divideArray(setC, 17) test_d = divideArray(setD, 17) test_e = divideArray(setE, 17) #discrete wavelength analysis #coeffecients for each set #Each set contains 1700 arrays representing each text file #Each array contains 3 arrays, representing the Approximate coeffecient, and the two distinct coeffecients #Each of those arrays contain the actual decimal coeffecient values testA_coeffs = DWT.getCoeffecients(test_a) testB_coeffs = DWT.getCoeffecients(test_b) testC_coeffs = DWT.getCoeffecients(test_c) testD_coeffs = DWT.getCoeffecients(test_d) testE_coeffs = DWT.getCoeffecients(test_e) #Now I have to pass each sets 3 arrays into a kmeans clustering #there will be 3 clustering algorithms, one for the approximate coeffiecent, and two for the distinct two coeffecients #From here we will come up with probability distribution approximate_coeff = [] d2coef = [] d1coef = [] #walking through the arrays to add to each set for i in range(0, 1700): #approximate for A for a in ((testA_coeffs[i][0])): approximate_coeff.append(a) #d2 for A for da2 in ((testA_coeffs[i][1])): d2coef.append(da2) #d3 for A for da1 in ((testA_coeffs[i][2])): d1coef.append(da1) #approximate for B for b in ((testB_coeffs[i][0])): approximate_coeff.append(b) #d2 for B for db2 in ((testB_coeffs[i][1])): d2coef.append(db2) #d1 for B for db1 in ((testB_coeffs[i][2])): d1coef.append(db1) #approximate for C for c in ((testC_coeffs[i][0])): approximate_coeff.append(c) #d2 for C for dc2 in ((testC_coeffs[i][1])): d2coef.append(dc2) #d1 for C for dc1 in ((testC_coeffs[i][2])): d1coef.append(dc1) #approximate for D for d in ((testD_coeffs[i][0])): approximate_coeff.append(d) #d2 for D for dd2 in ((testD_coeffs[i][1])): d2coef.append(dd2) #d1 for D for dd1 in ((testD_coeffs[i][2])): d1coef.append(dd1) #approximate for E for e in ((testE_coeffs[i][0])): approximate_coeff.append(e) #d2 for E for de2 in ((testE_coeffs[i][1])): d2coef.append(de2) #d1 for E for de1 in ((testE_coeffs[i][2])): d1coef.append(de1) #initializing array outputSet = [0] * 6800 + [1] * 1700 #running the probability distributions of each level A2distribution = kMeans.runModel(approximate_coeff, 62) D2distribution = kMeans.runModel(d2coef, 62) D1distribution = kMeans.runModel(d1coef, 122) #creating on array with all inputs for MLPNN #pre processing #Classes A-D (non seizures) are given an output of 0 #class E (seizure) is given an output of 1 dataset = pd.DataFrame(np.zeros((8500, 18))) for subband in range(len(A2distribution)): dataset.loc[subband] = ((A2distribution[subband] + D2distribution[subband] + D1distribution[subband])) dataset.columns = [ "A2k1", "A2k2", "A2k3", "K2k4", "A2k5", "A2k6", "D2k1", "D2k2", "D2k3", "D2k4", "D2k5", "D2k6", "D1k1", "D1k2", "D1k3", "K2k4", "D1k5", "D1k6", ] dataset['Output'] = outputSet #randomizing data dataset = dataset.sample(frac=1).reset_index(drop=True) return dataset
def compute_wavelets(station_file, lats, lons, radius, direction, dataset, \ wavelet, J): """ """ # Read station file stations = pd.read_csv(station_file, sep=r'\s{1,}', header=None, \ engine='python') stations.columns = ['name', 'longitude', 'latitude'] # Define subset of stations subset = pd.DataFrame(columns=['name', 'longitude', 'latitude']) # Loop on latitude and longitude a = 6378.136 e = 0.006694470 for (lat, lon) in zip(lats, lons): # Keep only stations in a given radius dx = (pi / 180.0) * a * cos(lat * pi / 180.0) / sqrt(1.0 - e * e * \ sin(lat * pi / 180.0) * sin(lat * pi / 180.0)) dy = (3.6 * pi / 648.0) * a * (1.0 - e * e) / ((1.0 - e * e * \ sin(lat * pi / 180.0) * sin(lat * pi / 180.0)) ** 1.5) x = dx * (stations['longitude'] - lon) y = dy * (stations['latitude'] - lat) stations['distance'] = np.sqrt(np.power(x, 2.0) + np.power(y, 2.0)) mask = stations['distance'] <= radius sub_stations = stations.loc[mask] subset = pd.concat([subset, sub_stations], ignore_index=True) subset.drop(columns=['distance'], inplace=True) subset.drop_duplicates(ignore_index=True, inplace=True) # Wavelet initialization g = get_scaling(wavelet) L = len(g) (nuH, nuG) = DWT.get_nu(wavelet, J) # Read GPS data and compute wavelet transform for station in subset['name']: filename = '../data/PANGA/' + dataset + '/' + station + '.' + direction # Load the data data = np.loadtxt(filename, skiprows=26) time = data[:, 0] disp = data[:, 1] error = data[:, 2] sigma = np.std(disp) # Correct for the repeated values dt = np.diff(time) gap = np.where(dt < 1.0 / 365.0 - 0.0001)[0] for i in range(0, len(gap)): if (gap[i] + 2 < len(time)): if ((time[gap[i] + 2] - time[gap[i] + 1] > 2.0 / 365.0 - 0.0001) \ and (time[gap[i] + 2] - time[gap[i] + 1] < 2.0 / 365.0 + 0.0001)): time[gap[i] + 1] = 0.5 * (time[gap[i] + 2] + time[gap[i]]) elif (gap[i] + 3 < len(time)): if ((time[gap[i] + 2] - time[gap[i] + 1] > 1.0 / 365.0 - 0.0001) \ and (time[gap[i] + 2] - time[gap[i] + 1] < 1.0 / 365.0 + 0.0001) \ and (time[gap[i] + 3] - time[gap[i] + 2] > 2.0 / 365.0 - 0.0001) \ and (time[gap[i] + 3] - time[gap[i] + 2] < 2.0 / 365.0 + 0.0001)): time[gap[i] + 1] = time[gap[i] + 2] time[gap[i] + 2] = 0.5 * (time[gap[i] + 2] + time[gap[i] + 3]) # Look for gaps greater than 1 day days = 2 dt = np.diff(time) gap = np.where(dt > days / 365.0 - 0.0001)[0] duration = np.round((time[gap + 1] - time[gap]) * 365).astype(np.int) # Fill the gaps by interpolation for j in range(0, len(gap)): time = np.insert(time, gap[j] + 1, \ time[gap[j]] + np.arange(1, duration[j]) / 365.0) disp = np.insert(disp, gap[j] + 1, \ np.random.normal(0.0, sigma, duration[j] - 1)) gap[j + 1:] = gap[j + 1:] + duration[j] - 1 # MODWT (W, V) = pyramid(disp, wavelet, J) (D, S) = get_DS(disp, W, wavelet, J) # Save wavelets into file filename = 'tmp/' + dataset + '_' + station + '_' + direction + '.pkl' pickle.dump([time, disp, W, V, D, S], open(filename, 'wb'))
def reduce(_H_): _, H = DWT.analyze(_H_) return H
def interpolate(H): LL = np.zeros(shape=(H[0].shape), dtype=np.float64) _H_ = DWT.synthesize(LL, H) return _H_