def test_pos(): np.random.seed(0) signal = np.random.rand(500) llsq_1 = linear_lstsq(signal, 15) llsq_2 = np.asarray([linear_lstsq(signal, 15, i) for i in range(500)]) np.testing.assert_array_equal(llsq_1, llsq_2) llsqd_1 = linear_lstsq_deriv(signal, 15) llsqd_2 = np.asarray( [linear_lstsq_deriv(signal, 15, i) for i in range(500)]) np.testing.assert_array_equal(llsqd_1, llsqd_2) ewma_1, ewmvar_1 = finite_approx_ewma_and_ewmvar(signal, 15) ew_2 = np.asarray( [finite_approx_ewma_and_ewmvar(signal, 15, i) for i in range(500)]) ewma_2 = ew_2[:, 0] ewmvar_2 = ew_2[:, 1] np.testing.assert_array_equal(ewma_1, ewma_2) np.testing.assert_array_equal(ewmvar_1, ewmvar_2) spe_1 = spectral_entropy(signal, 15) spe_2 = np.asarray([spectral_entropy(signal, 15, i) for i in range(500)]) np.testing.assert_array_equal(spe_1, spe_2) spe_det_1 = spectral_entropy_detrend(signal, linear_lstsq(signal, 15), 15) spe_det_2 = np.asarray( [spectral_entropy(signal, 15, i, True) for i in range(500)]) np.testing.assert_array_equal(spe_det_1, spe_det_2) print('All tests passed')
def reload_and_feature(picall, feature_type, average, nmel, order_frft, nmfcc, saveprojectpath, savedata, savepic, savetestdata, savepreprocess, savefeature, path, downsample_rate, frame_time, frame_length, frame_overlap, test_rate): ''' fe.stft, # 0 fe.zero_crossing_rate, # 1 fe.energy, # 2 fe.entropy_of_energy, # 3 fe.spectral_centroid_spread, # 4 fe.spectral_entropy, # 5 fe.spectral_flux, # 6 fe.spectral_rolloff, # 7 fe.bandwidth, # 8 fe.mfccs, # 9 fe.rms # 10 fe.stfrft # 11 fe.frft_mfcc # 12 ''' labelname = os.listdir(path) # 获取该数据集路径下的子文件名 if not os.path.exists(savefeature): os.mkdir(savefeature) # 创建保存特征结果的文件 for i in range(len(labelname)): if not os.path.exists(savefeature + '\\' + labelname[i]): os.mkdir(savefeature + '\\' + labelname[i]) datafile = open(savepreprocess, encoding='utf-8') # 读取预处理结果 csv_reader = csv.reader(datafile) # 以这种方式读取文件得到的结果是一个迭代器 feature_set = [] # 当使用统计量作为特征时,将所有样本的特征缓存入该变量以进行归一化 for row in csv_reader: # row中的元素是字符类型 time_series = np.array(row[2:]).astype( 'float32') # row的前两个元素分别是标签和对应文件次序 ####################################################################### frames = preprocessing.frame(time_series, frame_length, frame_overlap) # 分帧 f, t, stft = fe.stft(time_series, pic=None, fs=downsample_rate, nperseg=frame_length, noverlap=frame_length - frame_overlap, nfft=8192, boundary=None, padded=False) # if stft.shape[1] != frames.shape[1]: # 防止stft的时间个数和帧的个数不一样 # dim = min(stft.shape[1], frames.shape[1]) # stft = stft[:, 0:dim] # frames = frames[:, 0:dim] # Mel = lib.feature.melspectrogram(S=np.abs(stft), sr=downsample_rate, n_fft=2*(stft.shape[0]-1), n_mels=512) feature_list = [] # 用于存放各种类型的特征,每个帧对应一个特征向量,其元素分别是每种类型的特征 if picall: # 用于绘图控制 pic = savepic + '\\' + row[0] + '_' + row[1] else: pic = None for i in feature_type: if i == 0: feature0 = np.abs(stft) feature_list.append(feature0) elif i == 1: feature1 = fe.zero_crossing_rate(frames, pic=pic) feature_list.append(feature1) elif i == 2: feature2 = fe.energy(frames, pic=pic) feature_list.append(feature2) elif i == 3: feature3 = fe.entropy_of_energy(frames, pic=pic) feature_list.append(feature3) elif i == 4: feature4, feature41 = fe.spectral_centroid_spread( stft, downsample_rate, pic=pic) feature_list.append(feature4) feature_list.append(feature41) elif i == 5: feature5 = fe.spectral_entropy(stft, pic=pic) feature_list.append(feature5) elif i == 6: feature6 = fe.spectral_flux(stft, pic=pic) feature_list.append(feature6) elif i == 7: feature7 = fe.spectral_rolloff(stft, 0.85, downsample_rate, pic=pic) feature_list.append(feature7) elif i == 8: feature8 = fe.bandwidth(stft, f, pic=pic) feature_list.append(feature8) elif i == 9: feature9 = fe.mfccs( X=stft, fs=downsample_rate, # nfft=2*(stft.shape[0]-1), nfft=8192, n_mels=nmel, n_mfcc=nmfcc, pic=pic) feature_list.append(feature9) elif i == 10: feature10 = fe.rms(stft, pic=pic) feature_list.append(feature10) elif i == 11: feature11 = fe.stfrft(frames, p=order_frft[int(row[0])], pic=pic) feature_list.append(feature11) elif i == 12: tmp = fe.stfrft(frames, p=order_frft[int(row[0])]) feature12 = fe.frft_MFCC(S=tmp, fs=downsample_rate, n_mfcc=nmfcc, n_mels=nmel, pic=pic) feature_list.append(feature12) elif i == 13: feature13, feature13_ = fe.fundalmental_freq( frames=frames, fs=downsample_rate, pic=pic) feature_list.append(feature13) elif i == 14: feature14 = fe.chroma_stft(S=stft, n_chroma=12, A440=440.0, ctroct=5.0, octwidth=2, base_c=True, norm=2) feature_list.append(feature14) elif i == 15: feature15 = fe.log_attack_time(x=time_series, lower_ratio=0.02, upper_ratio=0.99, fs=downsample_rate, n=frames.shape[1]) feature_list.append(feature15) elif i == 16: feature16 = fe.temoporal_centroid(S=stft, hop_length=frame_overlap, fs=downsample_rate) feature_list.append(feature16) elif i == 17: # harm_freq, harm_mag = fe.harmonics(nfft=8192, nht=0.15, f=f, S=stft, fs=downsample_rate, fmin=50, fmax=500, threshold=0.2) # hsc = fe.harmonic_spectral_centroid(harm_freq, harm_mag) # hsd = fe.harmonic_spectral_deviation(harm_mag) # hss = fe.harmonic_spectral_spread(hsc, harm_freq, harm_mag) # hsv = fe.harmonic_spectral_variation(harm_mag) # feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0) # feature_list.append(feature17) harm_freq, harm_mag = timbral.harmonics(frames=frames, fs=downsample_rate, S=stft, f=f, nfft=8192, fmin=50, fmax=500, nht=0.15) hsc = timbral.harmonic_spectral_centroid(harm_freq, harm_mag) hsd = timbral.harmonic_spectral_deviation(harm_mag) hss = timbral.harmonic_spectral_spread(hsc, harm_freq, harm_mag) hsv = timbral.harmonic_spectral_variation(harm_mag) feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0) feature_list.append(feature17) elif i == 18: feature18 = fe.pitches_mag_CDSV(f=f, S=stft, fs=downsample_rate, fmin=50, fmax=downsample_rate / 2, threshold=0.2) feature_list.append(feature18) elif i == 19: feature19 = fe.delta_features(feature9, order=1) feature_list.append(feature19) elif i == 20: feature20 = fe.delta_features(feature9, order=2) feature_list.append(feature20) features = np.concatenate([j for j in feature_list], axis=0) # 我很欣赏这一句代码,将各种特征拼在一起 long = list(range(features.shape[1])) # 删除含有nan的帧 for t in long[::-1]: if np.isnan(features[:, t]).any(): features = np.delete(features, t, 1) if average: # 使用统计量作为特征 mean = np.mean(features, axis=1).reshape( 1, features.shape[0]) # 原来的特征向量是列向量,这里转成行向量 var = np.var(features, axis=1).reshape(1, features.shape[0]) # std = np.std(features, axis=1).reshape(1, features.shape[0]) # ske = np.zeros((1, features.shape[0])) # kur = np.zeros((1, features.shape[0])) # for n in range(features.shape[0]): # ske[0, i] = sts.skewness(features[i, :]) # kur[0, i] = sts.kurtosis(features[i, :]) features = np.concatenate([ mean, var, np.array([int(row[0]), int(row[1])]).reshape(1, 2) ], axis=1) # 使用统计平均代替每个帧的特征 feature_set.append(features) else: scale = StandardScaler().fit(features) features = scale.transform(features) # 进行归一化 csv_path = savefeature + '\\' + labelname[int( row[0])] + '\\' + row[0] + '_' + row[1] + '.csv' with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile: csv_writer = csv.writer(csvfile) buffer = np.concatenate([ features.T, int(row[0]) * np.ones((features.shape[1], 1)), int(row[1]) * np.ones((features.shape[1], 1)) ], axis=1) csv_writer.writerows(buffer) print('featuring:', row[0], row[1]) datafile.close() # 关闭文件,避免不必要的错误 if average: # 使用统计量作为特征 features = np.concatenate([k for k in feature_set], axis=0) # 我很欣赏这一句代码 行表示样本数,列表示特征数 tmp = features[:, -2:] # 防止归一化的时候把标签也归一化 features = features[:, 0:-2] scale = StandardScaler().fit(features) features = scale.transform(features) # 进行归一化 features = np.concatenate([features, tmp], axis=1) # 把之前分开的特征和标签拼在一起 for k in range(features.shape[0]): csv_path = savefeature + '\\' + labelname[int(features[k, -2])] + \ '\\' + str(int(features[k, -2])) + '_' + str(int(features[k, -1])) + '.csv' with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile: csv_writer = csv.writer(csvfile) # 每个音频文件只有一个特征向量,并存入一个csv文件 csv_writer.writerow(features[k, :]) # 注意这里写入的是一行,要用writerow
def test_features(): with open('SpO2_and_hypoxemia_labels/p000052-2191-01-10-02-21n.json', 'r') as json_file: # data = json.load(json_file) # raw_data = data['SpO2'][:500] np.random.seed(0) # signal = np.sin(np.arange(0, (6 * np.pi), (2 * np.pi / 50))) # signal = np.sin(np.arange(0, (20 * np.pi), (2 * np.pi / 100))) + \ # 2 * np.sin(np.arange(0, (4 * np.pi), (2 * np.pi / 500))) + \ # 0.5 * np.sin(np.arange(0, (200 * np.pi), (2 * np.pi / 10))) signal = np.concatenate([ np.sin(np.arange(0, (10 * np.pi), (2 * np.pi / 100))), np.sin(np.arange(0, (4 * np.pi), (2 * np.pi / 500))) ]) # signal = np.concatenate([np.zeros(50), np.ones(50)]) # signal = np.concatenate([np.zeros(500), np.ones(500)]) # signal = np.concatenate([np.arange(0, 100), 100 * np.ones(100)]) # signal = np.concatenate([np.arange(0, 1000), 1000 * np.ones(1000)]) # signal = np.concatenate([1 - np.exp(-np.arange(0, 10, 10 / 1000)), np.ones(1000)]) # signal = 4 * np.arange(0, 1, 1 / 150) * (1 - np.arange(0, 1, 1 / 150)) # # noise_std = 4 * np.arange(0, 1, 1 / 150) * (1 - np.arange(0, 1, 1 / 150)) # noise_std = np.ones(150) # noise_std = np.concatenate([0.09 * np.ones(50), 0.49 * np.ones(50), 0.25 * np.ones(50)]) # noise_std = np.concatenate([0.09 * np.ones(500), 1 * np.ones(500)]) # noise_std = np.concatenate([0.09 * np.ones(200), 0.49 * np.ones(200), 0.25 * np.ones(200)]) # noise = np.asarray([np.random.normal(0, i) for i in noise_std]) # raw_data = signal + noise raw_data = signal # raw_data = noise # mean_5 = moving_average(raw_data, 5) # mean_20 = moving_average(raw_data, 20) # ewma_0_333, ewmvar_0_333 = ema_and_emvar(raw_data, 0.333) # ewma_0_095, ewmvar_0_095 = ema_and_emvar(raw_data, 0.095) # fin_ewma_5, fin_ewmvar_5 = finite_approx_ewma_and_ewmvar(raw_data, 5) # fin_ewma_20, fin_ewmvar_20 = finite_approx_ewma_and_ewmvar(raw_data, 20) lin_lst_sq_5 = linear_lstsq(raw_data, 5) lin_lst_sq_20 = linear_lstsq(raw_data, 20) lin_lst_sq_60 = linear_lstsq(raw_data, 60) # lin_lst_sq_deriv_5 = linear_lstsq_deriv(raw_data, 5) # lin_lst_sq_deriv_20 = linear_lstsq_deriv(raw_data, 20) spec_entr_5 = spectral_entropy(raw_data, 5) spec_entr_20 = spectral_entropy(raw_data, 20) spec_entr_60 = spectral_entropy(raw_data, 60) spec_entr_det_5 = spectral_entropy_detrend(raw_data, lin_lst_sq_5, 5) spec_entr_det_20 = spectral_entropy_detrend(raw_data, lin_lst_sq_20, 20) spec_entr_det_60 = spectral_entropy_detrend(raw_data, lin_lst_sq_60, 60) # plt.figure(1) # plt.plot(signal) # plt.title('Signal') # plt.xlabel('Time (min)') # # plt.figure(2) # plt.plot(noise) # plt.title('Noise') # plt.xlabel('Time (min)') # plt.figure(3) plt.plot(raw_data) plt.title('Combined Data') plt.xlabel('Time (min)') # plt.figure(4) # plt.plot(raw_data) # plt.plot(signal) # plt.plot(mean_5) # plt.plot(mean_20) # plt.title('Moving Average') # plt.xlabel('Time (min)') # plt.legend(['Input', 'Signal', 'n = 5', 'n = 20']) # # plt.legend(['Signal', 'n = 5', 'n = 20']) # # plt.figure(5) # # plt.plot(raw_data) # plt.plot(signal) # plt.plot(ewma_0_333) # plt.plot(ewma_0_095) # plt.title('Exponentially Weighted Moving Average') # plt.xlabel('Time (min)') # # plt.legend(['Input', 'Signal', 'alpha = 0.333 ewma', 'alpha = 0.095 ewma']) # plt.legend(['Signal', 'alpha = 0.333 ewma', 'alpha = 0.095 ewma']) # # plt.figure(6) # # plt.plot(raw_data) # plt.plot(signal) # plt.plot(fin_ewma_5) # plt.plot(fin_ewma_20) # plt.title('Finite Approximation of Exponentially Weighted Moving Average') # plt.xlabel('Time (min)') # # plt.legend(['Input', 'Signal', 'n = 5', 'n = 20']) # plt.legend(['Signal', 'n = 5', 'n = 20']) # # plt.figure(7) # plt.plot(raw_data) # plt.plot(signal) # plt.plot(ewma_0_333) # plt.plot(ewma_0_095) # plt.plot(fin_ewma_5) # plt.plot(fin_ewma_20) # plt.title('Approximation Comparison') # plt.xlabel('Time (min)') # plt.legend(['Input', 'Signal', 'Original, alpha = 0.333', 'Original, alpha = 0.095', # 'Approximation, n = 5', 'Approximation, n = 20']) # # plt.legend(['Signal', 'Original, alpha = 0.333', 'Original, alpha = 0.095', # # 'Approximation, n = 5', 'Approximation, n = 20']) # # plt.figure(8) # plt.plot(raw_data) # plt.plot(mean_20) # plt.plot(ewma_0_095) # plt.title('Effect of Exponential Weighting') # plt.xlabel('Time (min)') # plt.legend(['Signal', 'Mean, n = 50', 'EWMA, alpha = 0.095']) # # plt.figure(9) # plt.plot(raw_data) # plt.plot(signal) # plt.plot(lin_lst_sq_5) # plt.plot(lin_lst_sq_20) # plt.plot(lin_lst_sq_60) # plt.title('Linear Least-Squares Filtering') # plt.xlabel('Time (min)') # plt.legend(['Input', 'Signal', 'n = 5', 'n = 20', 'n = 60']) # # plt.legend(['Signal', 'n = 5', 'n = 20']) # # plt.figure(10) # plt.plot(raw_data) # # plt.plot(signal) # plt.plot(mean_5) # plt.plot(fin_ewma_5) # plt.plot(lin_lst_sq_5) # plt.title('Filtering Comparison, n = 5') # plt.xlabel('Time (min)') # # plt.legend(['Input', 'Signal', 'Moving Average', 'Approximate Exponential', 'Linear Least-Squares']) # # plt.legend(['Signal', 'Moving Average', 'Approximate Exponential', 'Linear Least-Squares']) # plt.legend(['Input', 'Moving Average', 'Approximate Exponential', 'Linear Least-Squares']) # # plt.figure(11) # plt.plot(raw_data) # # plt.plot(signal) # plt.plot(mean_20) # plt.plot(fin_ewma_20) # plt.plot(lin_lst_sq_20) # plt.title('Filtering Comparison, n = 20') # plt.xlabel('Time (min)') # # plt.legend(['Input', 'Signal', 'Moving Average', 'Approximate Exponential', 'Linear Least-Squares']) # # plt.legend(['Signal', 'Moving Average', 'Approximate Exponential', 'Linear Least-Squares']) # plt.legend(['Input', 'Moving Average', 'Approximate Exponential', 'Linear Least-Squares']) # # plt.figure(12) # plt.bar(range(-(5 - 1), 1), np.ones(5) / 5) # plt.title('Moving Average n = 5 Filter') # # plt.figure(13) # plt.bar(range(-(20 - 1), 1), np.ones(20) / 20) # plt.title('Moving Average n = 20 Filter') # # plt.figure(14) # n = 100 # alpha = 0.333 # weights = alpha * np.ones(n) # for i in range(n): # weights[n - i - 1] *= (1 - alpha) ** i # plt.bar(range(-(n - 1), 1), weights) # plt.title('EWMA alpha = 0.333 Filter') # plt.figure(15) # n = 100 # alpha = 0.095 # weights = alpha * np.ones(n) # for i in range(n): # weights[n - i - 1] *= (1 - alpha) ** i # plt.bar(range(-(n - 1), 1), weights) # plt.title('EWMA alpha = 0.095 Filter') # plt.figure(16) # n = 5 # alpha = 2 / (n + 1) # weights = alpha * np.ones(n) # for i in range(n): # weights[n - i - 1] *= (1 - alpha) ** i # weights = weights / np.sum(weights) # plt.bar(range(-(n - 1), 1), weights) # plt.title('Finite Approximate EWMA n = 5 Filter') # # plt.figure(17) # n = 20 # alpha = 2 / (n + 1) # weights = alpha * np.ones(n) # for i in range(n): # weights[n - i - 1] *= (1 - alpha) ** i # weights = weights / np.sum(weights) # plt.bar(range(-(n - 1), 1), weights) # plt.title('Finite Approximate EWMA n = 20 Filter') # # plt.figure(18) # n = 5 # weights = np.asarray([(6 * k + 4 * n - 2) / (n * (n + 1)) for k in range(- n + 1, 1)]) # plt.bar(range(-(n - 1), 1), weights) # plt.title('Linear Least-Squares n = 5 Filter') # # plt.figure(19) # n = 20 # weights = np.asarray([(6 * k + 4 * n - 2) / (n * (n + 1)) for k in range(- n + 1, 1)]) # plt.bar(range(-(n - 1), 1), weights) # plt.title('Linear Least-Squares n = 20 Filter') # # plt.figure(20) # plt.plot(np.gradient(signal)) # plt.plot(lin_lst_sq_deriv_5) # plt.plot(lin_lst_sq_deriv_20) # plt.title('Linear Least-Squares Derivative Approximation') # plt.xlabel('Time (min)') # plt.legend(['Derivative of Signal', 'n = 5', 'n = 20']) # plt.figure(21) # plt.plot(np.square(noise_std)) # plt.plot(ewmvar_0_333) # plt.plot(ewmvar_0_095) # plt.title('Exponentially Weighted Moving Variance') # plt.xlabel('Time (min)') # plt.legend(['Noise Distribution', 'alpha = 0.333', 'alpha = 0.095']) # # plt.figure(22) # # plt.plot(signal) # plt.plot(np.square(noise_std)) # plt.plot(fin_ewmvar_5) # plt.plot(fin_ewmvar_20) # plt.title('Approximate Finite EWMV') # plt.xlabel('Time (min)') # plt.legend(['Noise Distribution', 'n = 5', 'n = 20']) plt.figure(23) # plt.plot(signal) # plt.plot(np.square(noise_std)) plt.plot(spec_entr_5) plt.plot(spec_entr_20) plt.plot(spec_entr_60) plt.title('Spectral Entropy') plt.xlabel('Time (min)') plt.legend(['n = 5', 'n = 20', 'n = 60']) plt.ylim(0) plt.figure(24) # plt.plot(signal) # plt.plot(np.square(noise_std)) plt.plot(spec_entr_det_5) plt.plot(spec_entr_det_20) plt.plot(spec_entr_det_60) plt.title('Spectral Entropy (Detrended)') plt.xlabel('Time (min)') plt.legend(['n = 5', 'n = 20', 'n = 60']) plt.ylim(0) plt.show()
def spectral_entropy_time(data): n_short_blocks = 1 return feature_extraction.spectral_entropy(data, n_short_blocks)
def spectral_entropy_freq(data): n_short_blocks = 1 return feature_extraction.spectral_entropy(abs(fft(data)), n_short_blocks)
frames = preprocessing.frame(data, frame_length, frame_lap) f, t, stft = fe.stft(data, pic=None, fs=fs, nperseg=frame_length, noverlap=frame_length - frame_lap, nfft=8192, boundary=None, padded=False) pic = None feature1 = fe.zero_crossing_rate(frames, pic=pic) feature2 = fe.energy(frames, pic=pic) feature3 = fe.entropy_of_energy(frames, pic=pic) feature4, feature41 = fe.spectral_centroid_spread(stft, fs, pic=pic) feature5 = fe.spectral_entropy(stft, pic=pic) feature6 = fe.spectral_flux(stft, pic=pic) feature7 = fe.spectral_rolloff(stft, 0.85, fs, pic=pic) feature8 = fe.bandwidth(stft, f, pic=pic) feature9 = fe.mfccs(X=stft, fs=fs, nfft=8192, n_mels=128, n_mfcc=13, pic=pic) feature10 = fe.rms(stft, pic=pic) feature11 = fe.stfrft(frames, p=0.95, pic=pic) tmp = fe.stfrft(frames, p=0.95) feature12 = fe.frft_MFCC(S=tmp, fs=fs, n_mfcc=13, n_mels=128, pic=pic) feature19 = fe.delta_features(feature9, order=1) feature20 = fe.delta_features(feature9, order=2) plt.figure() ax1 = plt.subplot(411) plt.plot(data) ax1.set_ylabel('original signal')