Ejemplo n.º 1
0
          'nccf_thresh2': 0.9,
          'nccf_maaxcands': 3,
          'nccf_pwidth': 5,       # 5
          'merit_boost': 5,
          'merit_pivot': 0.20,
          'merit_extra': 0.4,
          'median_value': 7,
          'dp_w1': 0.15,
          'dp_w2': 0.5,
          'dp_w3': 100,
          'dp_w4': 0.9
          }

pitch = pYAAPT.yaapt(signal, **params)
frames = preprocessing.frame(silence_remove, frame_length, frame_overlap)
f, t, stft = fea.stft(silence_remove, pic=None, fs=sample_rate, nperseg=frame_length,
         noverlap=frame_overlap, nfft=8192, padded=True, boundary=None)
f,t,stft = scipy.signal.stft(x=silence_remove, fs=sample_rate, window='hann', nperseg=frame_length, noverlap=frame_overlap,
                  nfft=8192, detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1)
print(pitch.samp_values.shape[0], frames.shape[1])
for i in range(min(pitch.samp_values.shape[0], frames.shape[1])):
    plt.figure()
    plt.subplot(211)
    X, _ = np.abs(fea.fft_singleside(x=frames[:,i], fs=sample_rate, n=8192, pic=None))
    plt.plot(np.arange(0, 8192/2+1), np.abs(stft[:,i]), 'y')
    plt.axvline(pitch.samp_interp[i], c='b')
    plt.axvline(pitch.samp_values[i], c='g')
    plt.subplot(212)
    plt.plot(np.arange(0, 8192 / 2 + 1), X, 'r')
    plt.axvline(pitch.samp_interp[i], c='b')
    plt.axvline(pitch.samp_values[i], c='g')
    plt.show()
Ejemplo n.º 2
0
savepic = saveprojectpath + '\\pic'
path = '..\\boy_and_girl'  # 数据集路径
downsample_rate = 8000
frame_length = int(0.02 * downsample_rate)  # 20ms
frame_overlap = frame_length // 2

with open(savedata + '\\' + 'feature_result.csv', 'w',
          encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)

datafile = open(savedata + '\\' + 'preprocessing_result.csv')
csv_reader = csv.reader(datafile)
for row in csv_reader:
    time_series = np.array(row[1:]).astype('float32')
    # stft的每一列一个帧对应的特征
    _, _, stft = fea.stft(time_series,
                          fs=downsample_rate,
                          nperseg=512,
                          noverlap=128,
                          nfft=512)
    stft = np.abs(stft)
    for i in range(stft.shape[1]):
        with open(savedata + '\\' + 'feature_result.csv',
                  'a+',
                  newline='',
                  encoding='utf-8') as csvfile:
            csv_write = csv.writer(csvfile)
            buffer = list(stft[:, i]) + [row[0]]  # 把特征对应的标签加进来
            csv_write.writerow(buffer)
            print(row[0], i)
Ejemplo n.º 3
0
def reload_and_feature(picall, feature_type, average, nmel, order_frft, nmfcc,
                       saveprojectpath, savedata, savepic, savetestdata,
                       savepreprocess, savefeature, path, downsample_rate,
                       frame_time, frame_length, frame_overlap, test_rate):
    '''
    fe.stft,                           # 0
    fe.zero_crossing_rate,             # 1
    fe.energy,                         # 2
    fe.entropy_of_energy,              # 3
    fe.spectral_centroid_spread,       # 4
    fe.spectral_entropy,               # 5
    fe.spectral_flux,                  # 6
    fe.spectral_rolloff,               # 7
    fe.bandwidth,                      # 8
    fe.mfccs,                          # 9
    fe.rms                             # 10
    fe.stfrft                          # 11
    fe.frft_mfcc                       # 12

    '''
    labelname = os.listdir(path)  # 获取该数据集路径下的子文件名
    if not os.path.exists(savefeature):
        os.mkdir(savefeature)  # 创建保存特征结果的文件
    for i in range(len(labelname)):
        if not os.path.exists(savefeature + '\\' + labelname[i]):
            os.mkdir(savefeature + '\\' + labelname[i])

    datafile = open(savepreprocess, encoding='utf-8')  # 读取预处理结果
    csv_reader = csv.reader(datafile)  # 以这种方式读取文件得到的结果是一个迭代器

    feature_set = []  # 当使用统计量作为特征时,将所有样本的特征缓存入该变量以进行归一化
    for row in csv_reader:  # row中的元素是字符类型
        time_series = np.array(row[2:]).astype(
            'float32')  # row的前两个元素分别是标签和对应文件次序
        #######################################################################
        frames = preprocessing.frame(time_series, frame_length,
                                     frame_overlap)  # 分帧
        f, t, stft = fe.stft(time_series,
                             pic=None,
                             fs=downsample_rate,
                             nperseg=frame_length,
                             noverlap=frame_length - frame_overlap,
                             nfft=8192,
                             boundary=None,
                             padded=False)
        # if stft.shape[1] != frames.shape[1]:                                 # 防止stft的时间个数和帧的个数不一样
        #     dim = min(stft.shape[1], frames.shape[1])
        #     stft = stft[:, 0:dim]
        #     frames = frames[:, 0:dim]
        # Mel = lib.feature.melspectrogram(S=np.abs(stft), sr=downsample_rate, n_fft=2*(stft.shape[0]-1), n_mels=512)
        feature_list = []  # 用于存放各种类型的特征,每个帧对应一个特征向量,其元素分别是每种类型的特征
        if picall:  # 用于绘图控制
            pic = savepic + '\\' + row[0] + '_' + row[1]
        else:
            pic = None

        for i in feature_type:
            if i == 0:
                feature0 = np.abs(stft)
                feature_list.append(feature0)
            elif i == 1:
                feature1 = fe.zero_crossing_rate(frames, pic=pic)
                feature_list.append(feature1)
            elif i == 2:
                feature2 = fe.energy(frames, pic=pic)
                feature_list.append(feature2)
            elif i == 3:
                feature3 = fe.entropy_of_energy(frames, pic=pic)
                feature_list.append(feature3)
            elif i == 4:
                feature4, feature41 = fe.spectral_centroid_spread(
                    stft, downsample_rate, pic=pic)
                feature_list.append(feature4)
                feature_list.append(feature41)
            elif i == 5:
                feature5 = fe.spectral_entropy(stft, pic=pic)
                feature_list.append(feature5)
            elif i == 6:
                feature6 = fe.spectral_flux(stft, pic=pic)
                feature_list.append(feature6)
            elif i == 7:
                feature7 = fe.spectral_rolloff(stft,
                                               0.85,
                                               downsample_rate,
                                               pic=pic)
                feature_list.append(feature7)
            elif i == 8:
                feature8 = fe.bandwidth(stft, f, pic=pic)
                feature_list.append(feature8)
            elif i == 9:
                feature9 = fe.mfccs(
                    X=stft,
                    fs=downsample_rate,
                    # nfft=2*(stft.shape[0]-1),
                    nfft=8192,
                    n_mels=nmel,
                    n_mfcc=nmfcc,
                    pic=pic)
                feature_list.append(feature9)
            elif i == 10:
                feature10 = fe.rms(stft, pic=pic)
                feature_list.append(feature10)
            elif i == 11:
                feature11 = fe.stfrft(frames,
                                      p=order_frft[int(row[0])],
                                      pic=pic)
                feature_list.append(feature11)
            elif i == 12:
                tmp = fe.stfrft(frames, p=order_frft[int(row[0])])
                feature12 = fe.frft_MFCC(S=tmp,
                                         fs=downsample_rate,
                                         n_mfcc=nmfcc,
                                         n_mels=nmel,
                                         pic=pic)
                feature_list.append(feature12)
            elif i == 13:
                feature13, feature13_ = fe.fundalmental_freq(
                    frames=frames, fs=downsample_rate, pic=pic)
                feature_list.append(feature13)
            elif i == 14:
                feature14 = fe.chroma_stft(S=stft,
                                           n_chroma=12,
                                           A440=440.0,
                                           ctroct=5.0,
                                           octwidth=2,
                                           base_c=True,
                                           norm=2)
                feature_list.append(feature14)
            elif i == 15:
                feature15 = fe.log_attack_time(x=time_series,
                                               lower_ratio=0.02,
                                               upper_ratio=0.99,
                                               fs=downsample_rate,
                                               n=frames.shape[1])
                feature_list.append(feature15)
            elif i == 16:
                feature16 = fe.temoporal_centroid(S=stft,
                                                  hop_length=frame_overlap,
                                                  fs=downsample_rate)
                feature_list.append(feature16)
            elif i == 17:
                # harm_freq, harm_mag = fe.harmonics(nfft=8192, nht=0.15, f=f, S=stft, fs=downsample_rate, fmin=50, fmax=500, threshold=0.2)
                # hsc = fe.harmonic_spectral_centroid(harm_freq, harm_mag)
                # hsd = fe.harmonic_spectral_deviation(harm_mag)
                # hss = fe.harmonic_spectral_spread(hsc, harm_freq, harm_mag)
                # hsv = fe.harmonic_spectral_variation(harm_mag)
                # feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0)
                # feature_list.append(feature17)
                harm_freq, harm_mag = timbral.harmonics(frames=frames,
                                                        fs=downsample_rate,
                                                        S=stft,
                                                        f=f,
                                                        nfft=8192,
                                                        fmin=50,
                                                        fmax=500,
                                                        nht=0.15)
                hsc = timbral.harmonic_spectral_centroid(harm_freq, harm_mag)
                hsd = timbral.harmonic_spectral_deviation(harm_mag)
                hss = timbral.harmonic_spectral_spread(hsc, harm_freq,
                                                       harm_mag)
                hsv = timbral.harmonic_spectral_variation(harm_mag)
                feature17 = np.concatenate([hsc, hsd, hss, hsv], axis=0)
                feature_list.append(feature17)
            elif i == 18:
                feature18 = fe.pitches_mag_CDSV(f=f,
                                                S=stft,
                                                fs=downsample_rate,
                                                fmin=50,
                                                fmax=downsample_rate / 2,
                                                threshold=0.2)
                feature_list.append(feature18)
            elif i == 19:
                feature19 = fe.delta_features(feature9, order=1)
                feature_list.append(feature19)
            elif i == 20:
                feature20 = fe.delta_features(feature9, order=2)
                feature_list.append(feature20)

        features = np.concatenate([j for j in feature_list],
                                  axis=0)  # 我很欣赏这一句代码,将各种特征拼在一起
        long = list(range(features.shape[1]))  # 删除含有nan的帧
        for t in long[::-1]:
            if np.isnan(features[:, t]).any():
                features = np.delete(features, t, 1)
        if average:  # 使用统计量作为特征
            mean = np.mean(features, axis=1).reshape(
                1, features.shape[0])  # 原来的特征向量是列向量,这里转成行向量
            var = np.var(features, axis=1).reshape(1, features.shape[0])
            # std = np.std(features, axis=1).reshape(1, features.shape[0])
            # ske = np.zeros((1, features.shape[0]))
            # kur = np.zeros((1, features.shape[0]))
            # for n in range(features.shape[0]):
            #     ske[0, i] = sts.skewness(features[i, :])
            #     kur[0, i] = sts.kurtosis(features[i, :])
            features = np.concatenate([
                mean, var,
                np.array([int(row[0]), int(row[1])]).reshape(1, 2)
            ],
                                      axis=1)  # 使用统计平均代替每个帧的特征
            feature_set.append(features)
        else:
            scale = StandardScaler().fit(features)
            features = scale.transform(features)  # 进行归一化
            csv_path = savefeature + '\\' + labelname[int(
                row[0])] + '\\' + row[0] + '_' + row[1] + '.csv'
            with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile:
                csv_writer = csv.writer(csvfile)
                buffer = np.concatenate([
                    features.T,
                    int(row[0]) * np.ones((features.shape[1], 1)),
                    int(row[1]) * np.ones((features.shape[1], 1))
                ],
                                        axis=1)
                csv_writer.writerows(buffer)
        print('featuring:', row[0], row[1])
    datafile.close()  # 关闭文件,避免不必要的错误
    if average:  # 使用统计量作为特征
        features = np.concatenate([k for k in feature_set],
                                  axis=0)  # 我很欣赏这一句代码    行表示样本数,列表示特征数
        tmp = features[:, -2:]  # 防止归一化的时候把标签也归一化
        features = features[:, 0:-2]
        scale = StandardScaler().fit(features)
        features = scale.transform(features)  # 进行归一化
        features = np.concatenate([features, tmp], axis=1)  # 把之前分开的特征和标签拼在一起
        for k in range(features.shape[0]):
            csv_path = savefeature + '\\' + labelname[int(features[k, -2])] + \
                       '\\' + str(int(features[k, -2])) + '_' + str(int(features[k, -1])) + '.csv'
            with open(csv_path, 'w', encoding='utf-8', newline='') as csvfile:
                csv_writer = csv.writer(csvfile)  # 每个音频文件只有一个特征向量,并存入一个csv文件
                csv_writer.writerow(features[k, :])  # 注意这里写入的是一行,要用writerow
Ejemplo n.º 4
0
plt.plot(f3, np.abs(S3))
plt.tight_layout()

x, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best')
downsample_rate = 22050

avoid_overlap = preprocessing.avoid_overlap(x,
                                            N=20,
                                            f=downsample_rate / 2,
                                            fs=fs,
                                            plot=False)
downsample = preprocessing.downsample(avoid_overlap, fs, downsample_rate)
f1, t1, S1 = fe.stft(x=x,
                     pic=None,
                     fs=fs,
                     nperseg=1500,
                     noverlap=750,
                     nfft=8192,
                     boundary=None,
                     padded=False)
f2, t2, S2 = fe.stft(x=downsample,
                     pic=None,
                     fs=downsample_rate,
                     nperseg=750,
                     noverlap=375,
                     nfft=8192,
                     boundary=None,
                     padded=False)


def stft_specgram(f, t, zxx, picname=None):
    plt.figure()
Ejemplo n.º 5
0
import matplotlib.pyplot as plt
import time
from preprocess_filter import *
import preprocessing
import feature_extraction as fe

# ex = '..\\..\\数据集2\\pre2012\\bflute\\BassFlute.ff.C5B5.aiff'
ex = '..\\..\\cello_and_viola\\viola\\Viola.arco.ff.sulA.A4.stereo.aiff'
x, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best')
downsample_rate = 22050
pre_amphasis = preprocessing.pre_emphasis(x, 0.97, pic=None)

f1, t1, S1 = fe.stft(x=x,
                     pic=None,
                     fs=fs,
                     nperseg=1500,
                     noverlap=750,
                     nfft=8192,
                     boundary=None,
                     padded=False)
f2, t2, S2 = fe.stft(x=pre_amphasis,
                     pic=None,
                     fs=fs,
                     nperseg=1500,
                     noverlap=750,
                     nfft=8192,
                     boundary=None,
                     padded=False)

plt.figure()
ax1 = plt.subplot(211)
ax1.set_xlabel('time')
Ejemplo n.º 6
0
import feature_extraction as fe
import visualization as visual
import timbral_feature as timbral
import matplotlib.pyplot as plt

# ex = '..\\..\\suhao.wav'
# ex = '..\\..\\boy_and_girl\\class1\\arctic_a0012.wav'
# ex = '..\\..\\数据集2\\pre2012\\bflute\\BassFlute.mf.C4B4.aiff'
# ex = '..\\..\\cello_and_viola\\viola\\Viola.arco.ff.sulA.A4.stereo.aiff'
ex = '..\\..\\数据集2\\post2012\\cello\\Cello.arco.ff.sulA.A5.stereo.aiff'
data, fs = lib.load(ex, sr=None, mono=True, res_type='kaiser_best')
frame_length = int(0.03*fs)
frame_lap = int(0.015*fs)
# data = data + np.random.randn(len(data))
frames = preprocessing.frame(data, frame_length, frame_lap)
f, t, stft = fe.stft(data, pic=None, fs=fs, nperseg=frame_length,
                     noverlap=frame_length-frame_lap, nfft=8192, boundary=None, padded=False)
stft = np.abs(stft)


harm_freq, harm_mag = timbral.harmonics(frames, fs, stft, f, nfft=8192, fmin=50, fmax=500,  nht=0.15)


# 绘制谐波以及频谱
i = 20
y2 = harm_freq[i]
# y2 = y2[0: 10]
visual.picfftandpitch(f, stft[:, i], y2, title='谐波提取', xlabel='freq(Hz)', ylabel='mag', pic=None)
plt.figure()
plt.plot(frames[:, i])
plt.show()