Ejemplo n.º 1
0
def combine_with_mfcc(xdir, neighbor=2, nfft=256, normal_flag=0):
    _, x = wavfile.read(xdir)
    _, _, Zxx = stft(x, freq, nfft=nfft)
    Zxx = log((abss(Zxx)).T + 1e-7) if normal_flag == 0 else normalize_mean(log((abss(Zxx)).T + 1e-7))
    m, n = Zxx.shape
    tmp = zer(m * n * (neighbor * 2 + 1), dtype='float32').reshape(m, -1)
    for i in range(2 * neighbor + 1):
        if i <= neighbor:
            shift = neighbor - i
            tmp[shift:m, i * n: (i + 1) * n] = Zxx[:m - shift]
            for j in range(shift):
                tmp[j, i * n: (i + 1) * n] = Zxx[0, :]
        else:
            shift = i - neighbor
            tmp[:m - shift, i * n: (i + 1) * n] = Zxx[shift:m]
            for j in range(shift):
                tmp[m - (j + 1), i * n: (i + 1) * n] = Zxx[m - 1, :]
    # now tmp is "make_window_buffer" output
    # then calc mfcc & d & dd
    mfcc_data = combine_mfcc_d_dd(mfcc(x, freq, winlen=0.016, winstep=0.008, nfft=256, winfunc=np.bartlett))
    while True:
        try:
            tmp1 = np.concatenate((tmp, mfcc_data), axis=1)
            break
        except ValueError:
            mfcc_data = np.concatenate((mfcc_data, np.zeros([1, mfcc_data.shape[1]])), axis=0)
            continue
    return tmp1
Ejemplo n.º 2
0
def get_Zyy(ydir, nfft=256, normal_flag=0):
    """
    get the stft.T of the wav file in ydir
    usually used in train and test, not in practical application
    :param ydir:
    :param nfft:
    :param normal_flag: 0 for log power, 1 for normalized log power
    :return:
    """
    _, y = wavfile.read(ydir)
    _, _, Zyy = stft(y, freq, nfft=nfft)
    return log((abss(Zyy)).T+1e-7) if normal_flag == 0 else normalize_mean(log((abss(Zyy)).T+1e-7))
Ejemplo n.º 3
0
def combine_with_mfcc_Zyy(ydir, nfft=256, normal_flag=0):
    _, y = wavfile.read(ydir)
    _, _, Zyy = stft(y, freq, nfft=nfft)
    y_data = log((abss(Zyy)).T + 1e-7) if normal_flag == 0 else normalize_mean(log((abss(Zyy)).T + 1e-7))
    mfcc_data = combine_mfcc_d_dd(mfcc(y, freq, winlen=0.016, winstep=0.008, nfft=256, winfunc=np.bartlett))
    while True:
        try:
            tmp1 = np.concatenate((y_data, mfcc_data), axis=1)
            break
        except ValueError:
            mfcc_data = np.concatenate((mfcc_data, np.zeros([1, mfcc_data.shape[1]])), axis=0)
            continue
    return tmp1
Ejemplo n.º 4
0
def make_window_buffer(xdir, neighbor=2, nfft=256, normal_flag=0):
    """
    get frame group for DNN input, and this is the key
    expend every row of the array to the combination of itself and its neighbors

    Example:
    given array like this:
                 [[1, 1, 1],
                  [2, 2, 2],
                  [3, 3, 3],
                  [4, 4, 4],
                  [5, 5, 5],
                  [6, 6, 6],
                  [7, 7, 7]]
    and we combine 1 neighbor, then we have:
        [[1, 1, 1, 1, 1, 1, 2, 2, 2],
         [1, 1, 1, 2, 2, 2, 3, 3, 3],
         [2, 2, 2, 3, 3, 3, 4, 4, 4],
         [3, 3, 3, 4, 4, 4, 5, 5, 5],
         [4, 4, 4, 5, 5, 5, 6, 6, 6],
         [5, 5, 5, 6, 6, 6, 7, 7, 7],
         [6, 6, 6, 7, 7, 7, 7, 7, 7]]
        (neighbor)↑,↑,↑ (neighbor)
    noticing this column marked above, they are the origin array
    noticing that for the start and end, it will repeat 'neighbour' times to make up
    :param xdir:
    :param neighbor:
    :param nfft:
    :param normal_flag: 0 for log power, 1 for normalized log power
    :return:
    """
    _, x = wavfile.read(xdir)
    _, _, Zxx = stft(x, freq, nfft=nfft)
    Zxx = log((abss(Zxx)).T+1e-7) if normal_flag == 0 else normalize_mean(log((abss(Zxx)).T+1e-7))
    m, n = Zxx.shape
    tmp = zer(m * n * (neighbor * 2 + 1), dtype='float32').reshape(m, -1)
    for i in range(2 * neighbor + 1):
        if i <= neighbor:
            shift = neighbor - i
            tmp[shift:m, i * n: (i + 1) * n] = Zxx[:m - shift]
            for j in range(shift):
                tmp[j, i * n: (i + 1) * n] = Zxx[0, :]
        else:
            shift = i - neighbor
            tmp[:m-shift, i * n: (i+1) * n] = Zxx[shift:m]
            for j in range(shift):
                tmp[m-(j + 1), i * n: (i + 1) * n] = Zxx[m-1, :]
    return tmp
Ejemplo n.º 5
0
def test_GRU(model, input_path, output_path):
    _, s = wavfile.read(input_path)
    _, _, Zxx = stft(s, freq)
    Zxx1 = log((abss(Zxx)).T + 1e-7)
    print(Zxx1.shape)
    yt = pack_GRU(input_path)
    y = model.predict(np.reshape(yt, [1, -1, 22]))
    y = unpack_GRU(y)
    print(y.shape)
    ypreComplex = exp(y.T * Zxx1.T) * exp(complex(0, 1) * ang(Zxx))
    _, xrec = istft(ypreComplex, freq)
    dataWrite = xrec.astype(np.int16)
    wavfile.write(output_path, freq, dataWrite)
Ejemplo n.º 6
0
def test_model_mfcc(model, input_path, output_path, neighbor, nffts, normal_flag=0):
    _, s = wavfile.read(input_path)
    _, _, Zxx = stft(s, freq)
    Zxx1 = log((abss(Zxx)).T+1e-7)
    y_input = combine_with_mfcc(input_path, neighbor=neighbor, nfft=nffts, normal_flag=normal_flag)
    y = model.predict(y_input)
    y = (np.delete(y, np.s_[-39:], axis=1)).T   # delete mfcc data
    ypreComplex = exp(y) * exp(complex(0, 1) * ang(Zxx)) if normal_flag == 0 \
        else exp(unnormalize(y, Zxx1)) * exp(complex(0, 1) * ang(Zxx))
    # ypreComplex = unnormalize(exp(y) * exp(complex(0, 1) * ang(Zxx)), abss(Zxx))  # wrong code
    _, xrec = istft(ypreComplex, freq)
    dataWrite = xrec.astype(np.int16)
    wavfile.write(output_path, freq, dataWrite)
Ejemplo n.º 7
0
def test_model(model, input_path, output_path, neighbor, nffts, normal_flag=0):
    _, s = wavfile.read(input_path)
    _, _, Zxx = stft(s, freq)
    Zxx1 = log((abss(Zxx)).T+1e-7)
    y_input = make_window_buffer(input_path, neighbor=neighbor, nfft=nffts, normal_flag=normal_flag)
    y = model.predict(y_input).T
    # print(y.shape, unnormalize(y, abss(Zxx)).shape, unnormalize(y, abss(Zxx)).dtype)
    ypreComplex = exp(y) * exp(complex(0, 1) * ang(Zxx)) if normal_flag == 0 \
        else exp(unnormalize(y, Zxx1)) * exp(complex(0, 1) * ang(Zxx))
    # ypreComplex = unnormalize(exp(y) * exp(complex(0, 1) * ang(Zxx)), abss(Zxx))  # wrong code
    _, xrec = istft(ypreComplex, freq)
    dataWrite = xrec.astype(np.int16)
    wavfile.write(output_path, freq, dataWrite)
Ejemplo n.º 8
0
def pack_GRU(xdir):
    _, x = wavfile.read(xdir)
    _, _, Zxx = stft(x, freq)
    Zxx = log((abss(Zxx)) + 1e-7)
    return bark_dct(bark_rescale(fr2bark(Zxx)))