Exemple #1
def calc_ssnr(ref_sig, sig, frame_size, mid_only=False):
    :param ref_sig: 作为参考(reference)的干净的信号
    :param sig: 待评估的语音信号
    :param frame_size: 帧的size
    :param mid_only:判断是否是中点
    :return: 计算得到的分段信噪比
    ref_frame = sdcore.frame(ref_sig, frame_size,
                             frame_size)  # 用于生成相应的frame array
    deg_frame = sdcore.frame(sig, frame_size, frame_size)
    if mid_only:  # 如果是从中点的话,只需要计算帧的一半即可,防止计算冗余
        i = len(ref_frame) // 2
        ref_frame = ref_frame[i, :]
        deg_frame = deg_frame[i, :]
    noise_frame = ref_frame - deg_frame
    ref_energy = np.sum(ref_frame**2, axis=-1) + min_pf
    noise_energy = np.sum(noise_frame**2, axis=-1) + min_pf
    ssnr = 10 * np.log10(ref_energy / noise_energy)
    if mid_only:
        # return min(ssnr_max, max(ssnr_min, ssnr))
        return ssnr
        ssnr[ssnr < ssnr_min] = ssnr_min
        ssnr[ssnr > ssnr_max] = ssnr_max
        return np.mean(ssnr)
Exemple #2
def calc_ssnr(ref_sig, deg_sig, frame_size, mid_only=False):
    # 计算分段信噪比
    ref_frame = sdcore.frame(ref_sig, frame_size, frame_size, center=False)
    deg_frame = sdcore.frame(deg_sig, frame_size, frame_size, center=False)
    if mid_only:
        i = len(ref_frame) // 2
        ref_frame = ref_frame[i, :]
        deg_frame = deg_frame[i, :]
    noise_frame = ref_frame - deg_frame
    ref_energy = np.sum(ref_frame**2, axis=-1) + min_pf
    noise_energy = np.sum(noise_frame**2, axis=-1) + min_pf
    ssnr = 10 * np.log10(ref_energy / noise_energy)
    if mid_only:
        # return min(ssnr_max, max(ssnr_min, ssnr))
        return ssnr
        ssnr[ssnr < ssnr_min] = ssnr_min
        ssnr[ssnr > ssnr_max] = ssnr_max
        return np.mean(ssnr)
Exemple #3
def compute_feat(wav, feat, samplerate, windowsize, hop_point, norm=False, center=False):
    :param feat: 特征类型
    if type(feat) == list or type(feat) == tuple:
        feature = [compute_feat(wav, f, samplerate, windowsize, hop_point,
                                norm=norm, center=center) for f in feat]
        return np.concatenate(feature, axis=1)

    if feat == FEAT_MAGNITUDE:
        frames = core.frame(wav, windowsize, hop_point, center=center)
        feature = core.stft(frames)
        feature = np.absolute(feature)
    elif feat == FEAT_LOG_MEL:
        feature = mel_spec.melspectrogram(wav, samplerate, windowsize, hop_point, MEL_COEF_N)
        feature = mel_spec.power_to_db(feature)
    elif feat == FEAT_PNCC:
        feature = pncc.calc_pncc(wav, samplerate, windowsize, hop_point)
    elif feat == FEAT_GF:
        feature = gammatone.gammatonegram(wav, samplerate, windowsize,
                                          hop_point, GAMMATONE_FILTER_N)
    elif feat == FEAT_GFCC:
        feature = gammatone.gammatonegram(wav, samplerate, windowsize,
                                          hop_point, GAMMATONE_FILTER_N)
        feature = gammatone.gtm2gfcc(feature, dct_stop=31)
    elif feat == FEAT_SPECTRUM:
        frames = core.frame(wav, windowsize, hop_point, center=center)
        feature = core.stft(frames)
    elif feat == FEAT_PHASE:
        frames = core.frame(wav, windowsize, hop_point, center=center)
        feature = core.stft(frames)
        feature = np.angle(feature)
    elif feat == FEAT_WAV:  # raw
        feature = core.frame(wav, windowsize, hop_point, center=center)
        raise ValueError("feature %s not support" % feat)

    if norm:
        feature = [standardize(f) for f in feature]

    return np.array(feature)
Exemple #4
def spectrogram(y=None, frames_size=410, frame_shift=160, power=2):
    :param y:
    :param power:
    :param frames_size:
    :param frame_shift:
    frames = core.frame(wav=y, frame_size=frames_size, frame_shift=frame_shift)
    # compute a magnitude spectrogram from input
    spect = np.abs(core.stft(frames=frames))
    spect = spect.T**power
    # print('spect from spectrogram is', spect)
    # print('hahhahahahh')
    return spect
Exemple #5
def gammatonegram(X, samplerate=16000, frame_size=410, frame_shift=160, nfilter=64,
                  low_freq=100, high_freq=None, fft_proc=True, width=1):
    if high_freq is None:
        high_freq = samplerate // 2

    if not fft_proc:
        fcoefs = make_erb_filters(samplerate, nfilter, low_freq)
        fcoefs = np.flipud(fcoefs)
        XF = pass_erb_filterbank(X, fcoefs)
        XE = XF ** 2
        num_frame = 1 + (XE.shape[1] - frame_size) // frame_shift
        Y = np.zeros((nfilter, num_frame))
        for i in range(num_frame):
            Y[:, i] = np.sqrt(np.mean(XE[:, i:i+frame_size], axis=1))
        nfft = frame_size
        gtm = fft2gammatonemx(nfft, samplerate, nfilter, low_freq, width, high_freq, nfft // 2 + 1)

        frame = core.frame(X, frame_size, frame_shift)
        FFTX = core.stft(frame).T
        Y = 1 / nfft * np.dot(gtm, np.abs(FFTX))
    return Y.T
Exemple #6
def calc_pncc(sig, sr=_SAMPLE_RATE, frame_size=410, frame_shift=160):
    计算Power-Normalized Cepstral Coefficients (PNCC)功率归一化倒谱系数
    :param sig: 原始时域信号
    :param sr: 采样频率,默认16kHz,若不同,会首先重采样
    :param frame_size: 每帧采样点数,默认对应25.6ms
    :param frame_shift: 每帧移动采样点数,默认对应10ms
    :return: PNCC特征
    # if sr != _SAMPLE_RATE:
    #     sig = librosa.resample(sig, sr, _SAMPLE_RATE)

    # STFT
    frames = basic.frame(sig, frame_size, frame_shift)
    frames = basic.stft(frames, n_fft=_FFT_SIZE, window='hamming', half=False)
    frames = frames[:, :_FFT_SIZE // 2]
    frames = np.abs(frames)

    num_frame = len(frames)

    if _PRE_EMPHSIS:
        # Pre - emphasis using H(z) = 1 - 0.97 z^-1
        sig[1:] -= 0.97 * sig[:-1]

    # Obtaning the gammatone coefficient.
    aad_H = _calc_gammatone_filter_response(_FILTER_NUM, _FFT_SIZE, sr)
    aad_H = np.abs(_normalize_gain(aad_H))

    # x = sig[:_FRAME_SIZE]
    # w = scipy.signal.get_window('hamming', _FRAME_SIZE)
    # c = w * x
    # scipy.io.savemat('sig.mat', {'sig':c})
    # X = np.fft.fft(c, _FFT_SIZE)

    # Obtaining the short-time Power
    aad_P, ad_sum_P = [], []
    aad_HT = aad_H.T
    for frame in frames:
        aad_P.append(np.sum((aad_HT * frame) ** 2, axis=1))
    aad_P = np.array(aad_P).T

    # window = scipy.signal.get_window('hamming', _FRAME_SIZE)
    # aad_P = np.zeros((_FILTER_NUM, num_frame))
    # ad_sum_P = []
    # fi = 0
    # for i in range(0, len(sig) - _FRAME_SIZE + 1, _FRAME_SHIFT):
    #     ad_x_st = sig[i:i+_FRAME_SIZE]
    #     ad_x_st = ad_x_st * window
    #     adSpec = np.fft.fft(ad_x_st, _FFT_SIZE)
    #     ad_X = np.abs(adSpec[:_FFT_SIZE//2])
    #     for j in range(_FILTER_NUM):
    #         aad_P[j, fi] = np.sum((ad_X * aad_H[:, j])**2)
    #     ad_sum_P.append(np.sum(aad_P[:, fi]))
    #     fi += 1

    # Peak Power Normalization Using 95 % percentile
    max_p = ad_sum_P[np.round(0.95 * len(ad_sum_P)).astype(int) - 1]
    aad_P = aad_P / max_p * _NORM_POWER
    # scipy.io.savemat('aad_P.mat', {'aad_P_tmp': aad_P})

        # Medium-duration power calculation
        aad_Q = []
        for i in range(_FILTER_NUM):
            q = []
            for j in range(num_frame):  # frame number
                    aad_P[i, max(0, j - _SMTH_FRM):min(num_frame, j + _SMTH_FRM + 1)]))
        aad_Q = np.array(aad_Q)

        aad_w = []
        for i in range(_FILTER_NUM):
            aad_tildeQ = _power_bias_sub(aad_Q[i, :], _DELTA)
            aad_w.append(_max(aad_tildeQ, _EPS) / _max(aad_Q[i, :], _EPS))
        aad_w = np.array(aad_w)

        # Weight smoothing aross channels
        aad_w_Smooth = np.zeros(aad_Q.shape)
        for i in range(_FILTER_NUM):
            for j in range(num_frame):
                aad_w_Smooth[i, j] = np.mean(
                    aad_w[max(i - _SMTH_FLT, 0):min(i + _SMTH_FLT + 1, _FILTER_NUM), j])

        aad_P *= aad_w_Smooth
        # aad_P = aad_P[:, _SMTH_FRM:aad_P.shape[1] - _SMTH_FRM - 1]

        # Apply the nonlinearity
            aadSpec = aad_P ** _POWER_COEFF
            aadSpec = np.log(aad_P + _EPS)

        # DCT
        aadDCT = dct(aadSpec, norm='ortho', axis=0)
        aadDCT = aadDCT[:DCT_NUM, :]

        # CMN
        for i in range(DCT_NUM):
            aadDCT[i, :] -= np.mean(aadDCT[i, :])

        return aadDCT.T