def getonefeatures(data): listdata = list(data) # Std fstd = np.array(list(map(lambda x: np.std(x), listdata))) # Approximate-Entropy(ApEN) m = 3 r = 0.2 * fstd fae = np.array( list(map(lambda x, y: pyeeg.ap_entropy(x, m, y), listdata, list(r)))) # Power # fpower,fre = pyeeg.bin_power(data, [1,30], fs) # print(fpower) # print("特征--{std:%.4f,AE:%.4f,Power:%.4f}"%(fstd,fae,fpower)) # First-order Diff ??? # firstoderlist = pyeeg.first_order_diff(data) # Hjorth fhjormod_com = np.array(list(map(lambda x: pyeeg.hjorth(x), listdata))) fhjor_act = np.array(list(map(lambda x: np.var(x), listdata))) # Spectrum Entropy # fse = pyeeg.spectral_entropy(data, [0,fs/2], fs) # print(fse) # Power Spectral Density fpsd = np.array(list(map(lambda x: np.max(plt.psd(x, fs)[0]), listdata))) # Features Stack featurestmp = np.stack( (fstd, fae, fhjormod_com[:, 0], fhjormod_com[:, 1], fhjor_act, fpsd), axis=1) temprow, tmpcol = featurestmp.shape features = np.reshape(featurestmp, (temprow * tmpcol, )) return features
def extract_features(couple_data): pca1 = pca_project_data(couple_data, 1) #take 1st pca dimension pca1_mean = np.mean(pca1, axis=0) # pca1_std = np.std(pca1, axis=0) # pca1_med = np.median(pca1, axis=0) # features = [] def sinuosity_deviation_features(seq, mean, std): sinuosity_dict = {"A":0, "B":0, "C":0} deviation_dict = {"I":0, "II":0, "III":0} sinuosity_deviation_dict = {"A-I":0,"A-II":0,"A-III":0,"B-I":0,"B-II":0,"B-III":0,"C-I":0,"C-II":0,"C-III":0} n = len(seq) for i in range(1,n-1): current_af = seq[i] prev_af = seq[i-1] next_af = seq[i+1] sinu = abs((next_af - current_af) + (current_af - prev_af)) if sinu == 0: label1 = "A" elif 0< sinu <= 1: label1 = "B" else: label1 = 'C' sinuosity_dict[label1] += 1 devi = abs(current_af - mean) close = std / 2 if devi <= close: label2 = "I" elif devi <= std: label2 = "II" elif devi > std: label2 = "III" deviation_dict[label2] += 1 sinuosity_deviation_dict["%s-%s"%(label1,label2)] += 1 return sinuosity_deviation_dict.values() n = len(pca1) pca1_sinuosity_deviation = sinuosity_deviation_features( pca1, pca1_mean, pca1_std ) features += list(np.array(pca1_sinuosity_deviation)/float(n-2)) seq = pca1 dfa = eg.dfa(seq); pfd = eg.pfd(seq) apen = eg.ap_entropy(seq,1,np.std(seq)*.2) svden = eg.svd_entropy(seq, 2, 2) features += [pca1_mean, pca1_med, pca1_std, dfa, pfd, apen, svden] return features
MAX_EPOCH_N = 256 * 30 EPOCH_STEP = 256 * 5 N_REPLICATES = 5 SPECT_ENT_BANDS = 2 ** np.arange(0,8)/2 fun_to_test = [ {"times":100,"name":"hfd", "is_original":True,"fun": lambda x: pyeeg.hfd(x,2**3)}, {"times":100,"name":"hfd", "is_original":False,"fun": lambda x: univ.hfd(x,2**3)}, {"times":100,"name":"hjorth", "is_original":True,"fun": lambda x: pyeeg.hjorth(x)}, {"times":100,"name":"hjorth", "is_original":False,"fun": lambda x: univ.hjorth(x)}, {"times":100,"name":"pfd", "is_original":True, "fun":lambda x: pyeeg.pfd(x)}, {"times":100,"name":"pfd", "is_original":False, "fun":lambda x: pyeeg.pfd(x)}, {"times":2,"name":"samp_ent", "is_original":True, "fun":lambda x: pyeeg.samp_entropy(x,2,1.5)}, {"times":10,"name":"samp_ent", "is_original":False, "fun":lambda x: univ.samp_entropy(x,2,1.5,relative_r=False)}, {"times":2,"name":"ap_ent", "is_original":True, "fun":lambda x: pyeeg.ap_entropy(x,2,1.5)}, {"times":10,"name":"ap_ent", "is_original":False, "fun":lambda x: univ.ap_entropy(x,2,1.5)}, {"times":10,"name":"svd_ent", "is_original":True, "fun":lambda x: pyeeg.svd_entropy(x,2,3)}, {"times":100,"name":"svd_ent", "is_original":False, "fun":lambda x: univ.svd_entropy(x,2,3)}, {"times":10,"name":"fisher_info", "is_original":True, "fun":lambda x: pyeeg.fisher_info(x,2,3)}, {"times":100, "name":"fisher_info", "is_original":False, "fun":lambda x: univ.fisher_info(x,2,3)}, {"times":100,"name":"spectral_entropy", "is_original":True, "fun":lambda x: pyeeg.spectral_entropy(x,SPECT_ENT_BANDS,256)}, {"times":100, "name":"spectral_entropy", "is_original":False, "fun":lambda x: univ.spectral_entropy(x,256, SPECT_ENT_BANDS)}, ] def make_one_rep(): ldfs = [] for n in range(MIN_EPOCH_N, MAX_EPOCH_N + 1, EPOCH_STEP): a = numpy.random.normal(size=n)
def CreateFeatureVector(collection, dbName, takeFirstMinutes): limitSamples = SamplingRates[dbName] * 60 * takeFirstMinutes lastBeat = -1 lastQ = -1 amplitudeSum = { Constants.LabelQ: 0, Constants.LabelR: 0, Constants.LabelS: 0 } labelsCounters = { Constants.LabelQ: 0, Constants.LabelR: 0, Constants.LabelS: 0 } amplitudesLists = { Constants.LabelQ: list(), Constants.LabelR: list(), Constants.LabelS: list() } heartbeats = list() valuesHistogram = dict() sumQS = 0 countQS = 0 for entry in collection.find().sort(Constants.Time, ASCENDING).limit(limitSamples): label = entry[Constants.Label] time = entry[Constants.Time] value = entry[Constants.Value] if label == Constants.LabelNone: continue amplitudeSum[label] += value labelsCounters[label] += 1 amplitudesLists[label].append(value) if value in valuesHistogram: valuesHistogram[value] += 1 else: valuesHistogram[value] = 1 if label == Constants.LabelR: if lastBeat > 0: heartbeats.append(time - lastBeat) lastBeat = time elif label == Constants.LabelQ: lastQ = time elif label == Constants.LabelS: if lastQ > 0: sumQS += time - lastQ countQS += 1 lastQ = -1 averageQAmplitude = amplitudeSum[Constants.LabelQ] / float( labelsCounters[Constants.LabelQ]) averageRAmplitude = amplitudeSum[Constants.LabelR] / float( labelsCounters[Constants.LabelR]) averageSAmplitude = amplitudeSum[Constants.LabelS] / float( labelsCounters[Constants.LabelS]) # Calculate the baseline by the most common value baseline = max(valuesHistogram.iteritems(), key=operator.itemgetter(1))[0] # Convert heartbeats length from a number of samples to actual time normalizedHeartbeats = [ float(i) / SamplingRates[dbName] for i in heartbeats ] heartbeatSTD = std(normalizedHeartbeats) beatChanges = [ abs(float(x) - normalizedHeartbeats[i - 1]) / normalizedHeartbeats[i - 1] for i, x in enumerate(normalizedHeartbeats) ][1:] beatChangesCount = len( [change for change in beatChanges if float(change) >= 0.1]) totalBeatsCount = len(beatChanges) result = FeatureVector( Database=dbName, RecordNumber=collection.name, AverageHeartbeat=60 / mean(normalizedHeartbeats), IrregularBeatsPercent=float(beatChangesCount) / totalBeatsCount, AverageBeatChange=mean(beatChanges), Irregularity=ap_entropy(normalizedHeartbeats, 2, heartbeatSTD * 0.2), QS=(sumQS / float(countQS)) * 1000 / SamplingRates[dbName], QtoR=abs(averageQAmplitude - baseline) / abs(averageRAmplitude - baseline), StoR=abs(averageSAmplitude - baseline) / abs(averageRAmplitude - baseline), QSTD=std(amplitudesLists[Constants.LabelQ]), RSTD=std(amplitudesLists[Constants.LabelR]), SSTD=std(amplitudesLists[Constants.LabelS])) return result
#print (get_all_wavelet[0][0]) temp_extrasi1 = [] temp_wavelet1 = [] # for getting temporary array for fiture extraction hasil_extrasi_fitur = [] #getting all result in feature extraction for x in range(len(get_all_wavelet)): for y in range(len(get_all_wavelet[x])): temp_extrasi1.append(dc.energi(get_all_wavelet[x][y])) temp_extrasi1.append( np.std(get_all_wavelet[x] [y])) # stdmu salah cak ji. aku langsung ambil ae iki. temp_extrasi1.append(dc.maximum(get_all_wavelet[x][y])) temp_extrasi1.append(dc.mininum(get_all_wavelet[x][y])) temp_extrasi1.append( ap_entropy(get_all_wavelet[x][y], len(get_all_wavelet[x][y]) / 5, temp_extrasi1[1] * float(0.2))) # diisi dengan aproximate entropy #param 1 itu datanya param ke dua itu panjangnya data yang ingin d potong , param ke tiga itu similarity. param ke dua dan ketiga diisi dengan mencoba coba temp_wavelet1.append(temp_extrasi1) temp_extrasi1 = [] hasil_extrasi_fitur.append(temp_wavelet1) temp_wavelet1 = [] # hasil seluruh extrasi fitur ada di hasil_extrasi_fitur dengan data berukuran 3D print len(hasil_extrasi_fitur) if len(hasil_extrasi_fitur) > int(0): print len(hasil_extrasi_fitur[0]) print hasil_extrasi_fitur[0][0][0] print hasil_extrasi_fitur[0][0][1] print hasil_extrasi_fitur[0][0][2]
'tol': table[idx, 2], 'score': table[idx, 0] }) print(' Channel %i: optimal pair is' % (i), table[idx, 1], 'min_l', table[idx, 2], 'tol --', 'auc roc is', table[idx, 0]) print('Tuning aproximate entropy ...') subwindow_sizes = np.arange(1, train_banded.shape[1]) opt_sw_tol_apent = [] for i in range(train_banded.shape[2]): table = [] for subwindow in subwindow_sizes: for tol in tolerance_values: entropy_list = [] for j in range(train_banded.shape[0]): entropy_list.append( pyeeg.ap_entropy(train_banded[j, :, i], subwindow, tol)) entropy_list = np.array(entropy_list) score = np.max([ roc_auc_score(y_true=train_y, y_score=entropy_list), roc_auc_score(y_true=1 - train_y, y_score=entropy_list) ]) table.append([score, subwindow, tol]) table = np.array(table) idx = np.argmax(table[:, 0]) opt_sw_tol_apent.append({ 'channel': i, 'subwindow': table[idx, 1], 'tol': table[idx, 2], 'score': table[idx, 0] }) print(' Channel %i: optimal pair is' % (i), table[idx, 1],
def CreateFeatureVector(collection, dbName, takeFirstMinutes): limitSamples = SamplingRates[dbName] * 60 * takeFirstMinutes lastBeat = -1 lastQ = -1 amplitudeSum = {Constants.LabelQ: 0, Constants.LabelR: 0, Constants.LabelS: 0} labelsCounters = {Constants.LabelQ: 0, Constants.LabelR: 0, Constants.LabelS: 0} amplitudesLists = {Constants.LabelQ: list(), Constants.LabelR: list(), Constants.LabelS: list()} heartbeats = list() valuesHistogram = dict() sumQS = 0 countQS = 0 for entry in collection.find().sort(Constants.Time, ASCENDING).limit(limitSamples): label = entry[Constants.Label] time = entry[Constants.Time] value = entry[Constants.Value] if label == Constants.LabelNone: continue amplitudeSum[label] += value labelsCounters[label] += 1 amplitudesLists[label].append(value) if value in valuesHistogram: valuesHistogram[value] += 1 else: valuesHistogram[value] = 1 if label == Constants.LabelR: if lastBeat > 0: heartbeats.append(time - lastBeat) lastBeat = time elif label == Constants.LabelQ: lastQ = time elif label == Constants.LabelS: if lastQ > 0: sumQS += time - lastQ countQS += 1 lastQ = -1 averageQAmplitude = amplitudeSum[Constants.LabelQ] / float(labelsCounters[Constants.LabelQ]) averageRAmplitude = amplitudeSum[Constants.LabelR] / float(labelsCounters[Constants.LabelR]) averageSAmplitude = amplitudeSum[Constants.LabelS] / float(labelsCounters[Constants.LabelS]) # Calculate the baseline by the most common value baseline = max(valuesHistogram.iteritems(), key=operator.itemgetter(1))[0] # Convert heartbeats length from a number of samples to actual time normalizedHeartbeats = [ float(i) / SamplingRates[dbName] for i in heartbeats ] heartbeatSTD = std(normalizedHeartbeats) beatChanges = [abs(float(x) - normalizedHeartbeats[i-1])/normalizedHeartbeats[i-1] for i, x in enumerate(normalizedHeartbeats)][1:] beatChangesCount = len([change for change in beatChanges if float(change) >= 0.1]) totalBeatsCount = len(beatChanges) result = FeatureVector( Database = dbName, RecordNumber = collection.name, AverageHeartbeat = 60 / mean(normalizedHeartbeats), IrregularBeatsPercent = float(beatChangesCount) / totalBeatsCount, AverageBeatChange = mean(beatChanges), Irregularity = ap_entropy(normalizedHeartbeats, 2, heartbeatSTD*0.2), QS = (sumQS / float(countQS)) * 1000 / SamplingRates[dbName], QtoR = abs(averageQAmplitude - baseline) / abs(averageRAmplitude - baseline), StoR = abs(averageSAmplitude - baseline) / abs(averageRAmplitude - baseline), QSTD = std(amplitudesLists[Constants.LabelQ]), RSTD = std(amplitudesLists[Constants.LabelR]), SSTD = std(amplitudesLists[Constants.LabelS]) ) return result
"times": 2, "name": "samp_ent", "is_original": True, "fun": lambda x: pyeeg.samp_entropy(x, 2, 1.5) }, { "times": 10, "name": "samp_ent", "is_original": False, "fun": lambda x: univ.samp_entropy(x, 2, 1.5, relative_r=False) }, { "times": 2, "name": "ap_ent", "is_original": True, "fun": lambda x: pyeeg.ap_entropy(x, 2, 1.5) }, { "times": 10, "name": "ap_ent", "is_original": False, "fun": lambda x: univ.ap_entropy(x, 2, 1.5) }, { "times": 10, "name": "svd_ent", "is_original": True, "fun": lambda x: pyeeg.svd_entropy(x, 2, 3) }, { "times": 100,