Ejemplo n.º 1
0
def get_mfcc(rate, sig):
    features = mfcc.mfcc(sig,rate)
    features = mfcc.logfbank(sig)
    features = mfcc.lifter(features)

    sum_of_squares = []
    index = -1
    for r in features:
        sum_of_squares.append(0)
        index = index + 1
        for n in r:
            sum_of_squares[index] = sum_of_squares[index] + n**2

    strongest_frame = sum_of_squares.index(max(sum_of_squares))
    hz = mfcc.mel2hz(features[strongest_frame])

    min_hz = min(hz)

    speech_booster = AudioEffectsChain().lowshelf(frequency=min_hz*(-1), gain=12.0, slope=0.5).highshelf(frequency=min_hz*(-1)*1.2, gain=-12.0, slope=0.5).limiter(gain=8.0)
    y_speech_boosted = speech_booster(sig)

    features = mfcc.mfcc(y_speech_boosted, rate, 0.025, 0.01, 16, nfilt=40, nfft=512, appendEnergy = False, winfunc=np.hamming)

    features = preprocessing.scale(features) #scaling to ensure that all values are within 0 and 1

    return features[1:5, :]
Ejemplo n.º 2
0
def get_MFCC(sr, audio):

    features = mfcc.mfcc(audio, sr)

    #############################
    #                           #
    #      Noise Removal        #
    #                           #
    #############################

    features = mfcc.logfbank(
        audio)  #computes the filterbank energy from an audio signal
    features = mfcc.lifter(
        features)  #increases magnitude of high frequency DCT coefficients

    sum_of_squares = []
    index = -1

    for r in features:
        """
        Since signals can be either positive or negative, taking n**2 allows us to compare the magnitudes 
        """
        sum_of_squares.append(0)
        index = index + 1
        for n in r:
            sum_of_squares[index] = sum_of_squares[index] + n**2

    strongest_frame = sum_of_squares.index(max(sum_of_squares))
    hz = mfcc.mel2hz(features[strongest_frame]
                     )  #converts the strongest frame's mfcc to hertz

    max_hz = max(hz)
    min_hz = min(hz)

    speech_booster = AudioEffectsChain().lowshelf(
        frequency=min_hz * (-1), gain=20.0,
        slope=0.5)  #creates an audio booster that removes low hz
    y_speech_boosted = speech_booster(audio)  #apply booster to original audio

    #############################
    #                           #
    #  FINAL MFCC CALCULATION   #
    #                           #
    #############################

    features = mfcc.mfcc(y_speech_boosted,
                         sr,
                         0.025,
                         0.01,
                         16,
                         nfilt=40,
                         nfft=512,
                         appendEnergy=False,
                         winfunc=np.hamming)

    features = preprocessing.scale(
        features)  #scaling to ensure that all values are within 0 and 1

    return features
Ejemplo n.º 3
0
    def __init__(self, input_dim, sr, num_filter, exp=False, filter_fix=False):
        super(fBPLayer, self).__init__()
        self.input_dim = input_dim
        self.num_filter = num_filter
        self.sr = sr
        self.exp = exp
        self.filter_fix = filter_fix

        requires_grad = not filter_fix
        input_freq = np.linspace(0, self.sr / 2, input_dim)
        self.input_freq = nn.Parameter(torch.from_numpy(input_freq).expand(num_filter, input_dim).float(),
                                       requires_grad=False)

        borders = np.linspace(0, hz2mel(sr / 2), num_filter + 2)
        borders = mel2hz(borders)

        self.bandwidth_low = nn.Parameter(torch.from_numpy(borders[:-2]).float().reshape(num_filter, 1),
                                          requires_grad=requires_grad)

        self.bandwidth = nn.Parameter(torch.from_numpy(borders[2:] - borders[:-2]).float().reshape(num_filter, 1),
                                      requires_grad=requires_grad)
Ejemplo n.º 4
0
    def __init__(self, input_dim, sr, num_filter, exp=False, filter_fix=False):
        super(fBLayer, self).__init__()
        self.input_dim = input_dim
        self.num_filter = num_filter
        self.sr = sr
        self.exp = exp
        self.filter_fix = filter_fix

        requires_grad = not filter_fix
        input_freq = np.linspace(0, self.sr / 2, input_dim)
        self.input_freq = nn.Parameter(torch.from_numpy(input_freq).expand(num_filter, input_dim).float(),
                                       requires_grad=False)

        centers = np.linspace(0, hz2mel(sr / 2), num_filter + 2)
        centers = mel2hz(centers)
        bandwidth = np.diff(centers)
        self.frequency_center = nn.Parameter(torch.from_numpy(centers[1:-1]).float().reshape(num_filter, 1),
                                             requires_grad=requires_grad)

        self.bandwidth_left = nn.Parameter(torch.from_numpy(bandwidth[:-1]).float().reshape(num_filter, 1),
                                           requires_grad=requires_grad)
        self.bandwidth_right = nn.Parameter(torch.from_numpy(bandwidth[1:]).float().reshape(num_filter, 1),
                                            requires_grad=requires_grad)
Ejemplo n.º 5
0
def main():

    # subsets = ['orignal', 'babble', 'noise', 'music', 'reverb']

    # load selected input uids
    dir_path = pathlib.Path(args.extract_path)
    print('Path is %s' % str(dir_path))

    # inputs [train/valid/test]
    try:
        with open(args.extract_path + '/freq.data.pickle', 'rb') as f:
            freq_data = pickle.load(f)  # avg on time axis
        with open(args.extract_path + '/time.data.pickle', 'rb') as f:
            time_data = pickle.load(f)  # avg on freq axis

    except:
        train_lst = list(dir_path.glob('*train*bin'))
        veri_lst = list(dir_path.glob('*ver*bin'))
        valid_lst = list(dir_path.glob('*valid*bin'))
        test_lst = list(dir_path.glob('*test*bin'))

        print(' Train set extracting:')
        time_data = []

        num_utt = 0
        for t in train_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (data, grad, uid) in tqdm(sets):
                    time_data.append((data, grad))
                    num_utt += 1
                    if num_utt >= args.samples:
                        break
        with open(args.extract_path + '/time.data.pickle', 'wb') as f:
            pickle.dump(time_data, f, protocol=pickle.HIGHEST_PROTOCOL)

        freq_data = {}

        train_data_mean = np.zeros(
            (args.feat_dim))  # [data.mean/grad.abssum/grad.var]
        train_time_mean = np.zeros(
            (args.feat_dim))  # [data.mean/grad.abssum/grad.var]
        train_time_var = np.zeros((args.feat_dim))

        num_utt = 0
        for t in train_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (data, grad, uid) in tqdm(sets):
                    train_time_mean += np.mean(grad, axis=0)
                    train_time_var += np.var(grad, axis=0)
                    train_data_mean += np.mean(data, axis=0)
                    num_utt += 1

        train_time_mean /= num_utt
        train_time_var /= num_utt
        train_data_mean /= num_utt

        freq_data['train.time.mean'] = train_time_mean
        freq_data['train.time.var'] = train_time_var
        freq_data['train.data.mean'] = train_data_mean

        print(' Valid set extracting:')
        valid_data_mean = np.zeros(
            (args.feat_dim))  # [data.mean/grad.abssum/grad.var]
        valid_time_mean = np.zeros(
            (args.feat_dim))  # [data.mean/grad.abssum/grad.var]
        valid_time_var = np.zeros((args.feat_dim))

        valid_data = np.zeros((3, args.feat_dim))  # [data/grad]
        num_utt = 0
        for t in valid_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (data, grad, uid) in tqdm(sets):
                    valid_data_mean += np.mean(np.abs(data), axis=0)
                    valid_time_mean += np.mean(np.abs(grad), axis=0)
                    valid_time_var += np.var(grad, axis=0)

                    num_utt += 1
        if num_utt > 0:
            valid_time_mean = valid_time_mean / num_utt
            valid_time_var = valid_time_var / num_utt
            valid_data_mean = valid_data_mean / num_utt

        freq_data['valid.time.mean'] = valid_time_mean
        freq_data['valid.time.var'] = valid_time_var
        freq_data['valid.data.mean'] = valid_data_mean

        print(' Train verification set extracting:')
        veri_data = np.zeros(
            (3, 2, args.feat_dim))  # [data/grad, utt_a, utt_b]

        train_veri_data = np.zeros((args.feat_dim))
        train_veri_mean = np.zeros((args.feat_dim))
        train_veri_var = np.zeros((args.feat_dim))
        train_veri_relu = np.zeros((args.feat_dim))

        num_utt = 0
        for t in veri_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (label, grad_a, grad_b, data_a, data_b) in tqdm(sets):
                    train_veri_data += (np.mean(data_a, axis=0) +
                                        np.mean(data_b, axis=0)) / 2
                    train_veri_mean += (np.mean(np.abs(grad_a), axis=0) +
                                        np.mean(np.abs(grad_b), axis=0)) / 2
                    train_veri_var += (np.var(grad_a, axis=0) +
                                       np.var(grad_b, axis=0)) / 2

                    num_utt += 1

        if num_utt > 0:
            train_veri_data /= num_utt
            train_veri_mean /= num_utt
            train_veri_var /= num_utt

        freq_data['train.veri.time.mean'] = train_veri_mean
        freq_data['train.veri.time.var'] = train_veri_var
        freq_data['train.veri.data.mean'] = train_veri_data

        print(' Test set extracting:')
        # test_data = np.zeros((3, 2, args.feat_dim))  # [data/grad, utt_a, utt_b]
        test_veri_data = np.zeros((args.feat_dim))
        test_veri_mean = np.zeros((args.feat_dim))
        test_veri_var = np.zeros((args.feat_dim))
        test_veri_relu = np.zeros((args.feat_dim))

        num_utt = 0
        for t in test_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (label, grad_a, grad_b, data_a, data_b) in tqdm(sets):
                    test_veri_data += (np.mean(data_a, axis=0) +
                                       np.mean(data_b, axis=0)) / 2
                    test_veri_mean += (np.mean(np.abs(grad_a), axis=0) +
                                       np.mean(np.abs(grad_b), axis=0)) / 2

                    test_veri_var += (np.var(grad_a, axis=0) +
                                      np.var(grad_b, axis=0)) / 2

                    num_utt += 1
        if num_utt > 0:
            test_veri_data /= num_utt
            test_veri_mean /= num_utt
            test_veri_var /= num_utt

        freq_data['test.veri.time.mean'] = test_veri_mean
        freq_data['test.veri.time.var'] = test_veri_var
        freq_data['test.veri.data.mean'] = test_veri_data

        print('Saving inputs in %s' % args.extract_path)

        with open(args.extract_path + '/freq.data.pickle', 'wb') as f:
            pickle.dump(freq_data, f, protocol=pickle.HIGHEST_PROTOCOL)

    # all_data [5, 2, 120, 161]
    # plotting filters distributions

    # train_data [numofutt, feats[N, 161]]
    train_input = freq_data['train.data.mean']
    valid_input = freq_data['valid.data.mean']
    test_input = freq_data['test.veri.data.mean']

    train_grad = freq_data['train.time.mean']
    valid_grad = freq_data['valid.time.mean']
    veri_grad = freq_data['train.veri.time.mean']

    test_grad = freq_data['test.veri.time.mean']

    x = np.arange(args.feat_dim) * 8000 / (args.feat_dim - 1)  # [0-8000]
    if args.acoustic_feature == 'fbank':
        m = np.linspace(0, 2840.0230467083188, args.feat_dim)
        x = mel2hz(m)

    # y = np.sum(all_data, axis=2)  # [5, 2, 162]
    pdf = PdfPages(args.extract_path + '/grad.veri.time.mean.pdf')
    plt.rc('font', family='Times New Roman')

    plt.figure(figsize=(12, 9))
    # plt.title('Gradient Distributions', fontsize=22)
    plt.xlabel('Frequency (Hz)', fontsize=24)
    plt.xticks(fontsize=22)
    plt.ylabel('Weight', fontsize=24)
    plt.yticks(fontsize=22)

    m = np.arange(0, 2840.0230467083188)
    m = 700 * (10**(m / 2595.0) - 1)
    n = np.array([m[i] - m[i - 1] for i in range(1, len(m))])
    n = 1 / n

    f = interpolate.interp1d(m[1:], n)
    xnew = np.arange(np.min(m[1:]), np.max(m[1:]),
                     (np.max(m[1:]) - np.min(m[1:])) / 161)
    ynew = f(xnew)
    ynew = ynew / ynew.sum()
    plt.plot(xnew, ynew)
    # print(np.sum(ynew))

    for s in train_grad, valid_grad, veri_grad, test_grad:
        # for s in test_a_set_grad, test_b_set_grad:
        f = interpolate.interp1d(x, s)
        xnew = np.linspace(np.min(x), np.max(x), 161)
        ynew = f(xnew)
        ynew = ynew / ynew.sum()
        plt.plot(xnew, ynew)
        # pdb.set_trace
    # if not os.path.exists(args.extract_path + '/grad.npy'):
    ynew = veri_grad
    ynew = ynew / ynew.sum()

    np.save(args.extract_path + '/train.grad.npy',
            train_grad)  # save the gradient to a npy file

    # plt.legend(['Mel-scale', 'Train', 'Valid', 'Test_a', 'Test_b'], loc='upper right', fontsize=18)
    plt.legend(['Train', 'Valid', 'Train Verify', 'Test'],
               loc='upper right',
               fontsize=24)
    # plt.legend(['Mel-scale', 'Train', 'Valid', 'Train Verify', 'Test'], loc='upper right', fontsize=24)
    pdf.savefig()
    pdf.close()

    # plt.savefig(args.extract_path + "/grads.png")
    # plt.show()

    plt.figure(figsize=(8, 6))
    plt.title('Data distributions', fontsize=22)
    plt.xlabel('Frequency (Hz)', fontsize=16)
    plt.ylabel('Log Power (-)', fontsize=16)
    # 插值平滑 ???
    for s in train_input, valid_input, test_input:
        # for s in test_a_set_grad, test_b_set_grad:
        f = interpolate.interp1d(x, s)
        xnew = np.linspace(np.min(x), np.max(x), 161)
        ynew = f(xnew)
        plt.plot(xnew, ynew)

    plt.legend(['Train', 'Valid', 'Test'], loc='upper right', fontsize=16)
    plt.savefig(args.extract_path + "/inputs.freq.png")
    plt.show()

    plt.figure(figsize=(16, 8))
    plt.title('Data distributions in Time Axis', fontsize=22)
    plt.xlabel('Time', fontsize=16)
    plt.ylabel('Magnetitude', fontsize=16)
    # 插值平滑 ???
    # for i, (data, grad) in enumerate(time_data):
    # for s in test_a_set_grad, test_b_set_grad:
    data = time_data[0][0]
    grad = time_data[0][1]
    norm = matplotlib.colors.Normalize(vmin=0., vmax=1.)
    # data_mean = data.mean(axis=10

    ax = plt.subplot(2, 1, 1)

    # data = (data - data.min()) / (data.max() - data.min())
    # im = ax.imshow(np.log(data.transpose()), cmap='viridis', aspect='auto')
    im = ax.imshow(data.transpose(), cmap='viridis', aspect='auto')
    # print(data.min(), data.max())
    plt.colorbar(im)  # 显示颜色标尺
    # ax.plot(data_mean)

    ax = plt.subplot(2, 1, 2)
    grad = np.abs(grad)
    grad_mean = grad
    # grad_mean = (grad - grad.min()) / (grad.max() - grad.min())
    # im = ax.imshow(1/np.log(grad_mean.transpose()), norm=norm, cmap='viridis', aspect='auto')
    im = ax.imshow(grad_mean.transpose(), cmap='viridis', aspect='auto')
    # ax.plot(np.log(grad_mean))
    ax.set_xlim(0, len(grad_mean))

    # plt.legend(['Train', 'Valid', 'Test'], loc='upper right', fontsize=16)
    plt.colorbar(im)  # 显示颜色标尺
    plt.savefig(args.extract_path + "/inputs.time.png")
    plt.show()

    print('Completed!\n')
Ejemplo n.º 6
0
def main():

    # subsets = ['orignal', 'babble', 'noise', 'music', 'reverb']

    # load selected input uids
    dir_path = pathlib.Path(args.extract_path)
    print('Path is %s' % str(dir_path))

    # inputs [train/valid/test]
    if os.path.exists(args.extract_path + '/inputs.train.npy'):
        train_data = np.load(args.extract_path + '/inputs.train.npy')
        valid_data = np.load(args.extract_path + '/inputs.valid.npy')
        test_data = np.load(args.extract_path + '/inputs.test.npy')
        veri_data = np.load(args.extract_path + '/inputs.veri.npy')

    else:
        train_lst = list(dir_path.glob('*train*bin'))
        veri_lst = list(dir_path.glob('*ver*bin'))
        valid_lst = list(dir_path.glob('*valid*bin'))
        test_lst = list(dir_path.glob('*test*bin'))

        print('Train set extracting:')
        train_data = np.zeros((3, args.feat_dim))  # [data/grad]
        num_utt = 0
        for t in train_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (data, grad) in sets:
                    train_data[1] += np.sum(np.abs(grad), axis=0)
                    this_weight = np.var(grad, axis=0)
                    train_data[2] += this_weight  # / this_weight.sum()
                    # train_data[1] += np.mean(grad, axis=0)
                    train_data[0] += np.mean(data, axis=0)
                    num_utt += 1
        train_data = train_data / num_utt

        print('Valid set extracting:')
        valid_data = np.zeros((3, args.feat_dim))  # [data/grad]
        num_utt = 0
        for t in valid_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (data, grad) in sets:
                    valid_data[1] += np.sum(np.abs(grad), axis=0)
                    this_weight = np.var(grad, axis=0)
                    valid_data[2] += this_weight  # / this_weight.sum()
                    # valid_data[1] += np.mean(grad, axis=0)
                    valid_data[0] += np.mean(data, axis=0)
                    num_utt += 1
        valid_data = valid_data / num_utt

        print('Train verification set extracting:')
        veri_data = np.zeros((3, 2, args.feat_dim))  # [data/grad, utt_a, utt_b]
        num_utt = 0
        for t in veri_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (label, grad_a, grad_b, data_a, data_b) in sets:
                    veri_data[0][0] += np.mean(data_a, axis=0)
                    veri_data[0][1] += np.mean(data_b, axis=0)

                    veri_data[1][0] += np.sum(np.abs(grad_a), axis=0)
                    veri_data[1][1] += np.sum(np.abs(grad_b), axis=0)

                    this_weight_a = np.var(grad_a, axis=0)
                    veri_data[2][0] += this_weight_a  # / this_weight_a.sum()

                    this_weight_b = np.var(grad_b, axis=0)
                    veri_data[2][1] += this_weight_b  # / this_weight_b.sum()

                    num_utt += 1

        veri_data = veri_data / num_utt

        print('Test set extracting:')
        test_data = np.zeros((3, 2, args.feat_dim))  # [data/grad, utt_a, utt_b]
        num_utt = 0
        for t in test_lst:
            p = str(t)
            with open(p, 'rb') as f:
                sets = pickle.load(f)
                for (label, grad_a, grad_b, data_a, data_b) in sets:
                    test_data[0][0] += np.mean(data_a, axis=0)
                    test_data[0][1] += np.mean(data_b, axis=0)

                    test_data[1][0] += np.sum(np.abs(grad_a), axis=0)
                    test_data[1][1] += np.sum(np.abs(grad_b), axis=0)

                    this_weight_a = np.var(grad_a, axis=0)
                    test_data[2][0] += this_weight_a  # / this_weight_a.sum()

                    this_weight_b = np.var(grad_b, axis=0)
                    test_data[2][1] += this_weight_b  #/ this_weight_b.sum()

                    num_utt += 1

        test_data = test_data / num_utt

        print('Saving inputs in %s' % args.extract_path)

        train_data = np.array(train_data)
        valid_data = np.array(valid_data)
        test_data = np.array(test_data)

        np.save(args.extract_path + '/inputs.train.npy', train_data)
        np.save(args.extract_path + '/inputs.valid.npy', valid_data)
        np.save(args.extract_path + '/inputs.veri.npy', veri_data)
        np.save(args.extract_path + '/inputs.test.npy', test_data)

    # all_data [5, 2, 120, 161]
    # plotting filters distributions

    # train_data [numofutt, feats[N, 161]]
    train_set_input = train_data[0]
    valid_set_input = valid_data[0]
    test_a_set_input = test_data[0][0]
    test_b_set_input = test_data[0][1]

    train_set_grad = train_data[1]
    valid_set_grad = valid_data[1]

    veri_set_grad = veri_data[1][0] + veri_data[1][1]
    test_set_grad = test_data[1][0] + test_data[1][1]

    x = np.arange(args.feat_dim) * 8000 / (args.feat_dim - 1)  # [0-8000]
    if args.acoustic_feature == 'fbank':
        m = np.linspace(0, 2840.0230467083188, args.feat_dim)
        x = mel2hz(m)

    # y = np.sum(all_data, axis=2)  # [5, 2, 162]

    pdf = PdfPages(args.extract_path + '/grad.veri.pdf')
    plt.rc('font', family='Times New Roman')

    plt.figure(figsize=(12, 9))
    # plt.title('Gradient Distributions', fontsize=22)
    plt.xlabel('Frequency (Hz)', fontsize=24)
    plt.xticks(fontsize=22)
    plt.ylabel('Weight', fontsize=24)
    plt.yticks(fontsize=22)

    m = np.arange(0, 2840.0230467083188)
    m = 700 * (10 ** (m / 2595.0) - 1)
    n = np.array([m[i] - m[i - 1] for i in range(1, len(m))])
    n = 1 / n

    f = interpolate.interp1d(m[1:], n)
    xnew = np.arange(np.min(m[1:]), np.max(m[1:]), (np.max(m[1:]) - np.min(m[1:])) / 161)
    ynew = f(xnew)
    ynew = ynew / ynew.sum()
    plt.plot(xnew, ynew)
    # print(np.sum(ynew))

    for s in train_set_grad, valid_set_grad, veri_set_grad, test_set_grad:
        # for s in test_a_set_grad, test_b_set_grad:
        f = interpolate.interp1d(x, s)
        xnew = np.linspace(np.min(x), np.max(x), 161)
        ynew = f(xnew)
        # ynew = ynew - ynew.min()
        ynew = ynew / ynew.sum()
        plt.plot(xnew, ynew)
        # pdb.set_trace
    # if not os.path.exists(args.extract_path + '/grad.npy'):
    ynew = veri_set_grad
    ynew = ynew / ynew.sum()

    np.save(args.extract_path + '/grad.veri.npy', ynew)

    # plt.legend(['Mel-scale', 'Train', 'Valid', 'Test_a', 'Test_b'], loc='upper right', fontsize=18)
    plt.legend(['Mel-scale', 'Train Set', 'Valid Set', 'train Verify Set', 'Test Set'], loc='upper right', fontsize=24)
    pdf.savefig()
    pdf.close()

    # plt.savefig(args.extract_path + "/grads.png")
    # plt.show()

    plt.figure(figsize=(8, 6))
    plt.title('Data distributions', fontsize=22)
    plt.xlabel('Frequency (Hz)', fontsize=16)
    plt.ylabel('Log Power Energy (CMVN)', fontsize=16)
    # 插值平滑 ???
    for s in train_set_input, valid_set_input, test_a_set_input, test_b_set_input:
        # for s in test_a_set_grad, test_b_set_grad:
        f = interpolate.interp1d(x, s)
        xnew = np.linspace(np.min(x), np.max(x), 161)
        ynew = f(xnew)
        plt.plot(xnew, ynew)

    plt.legend(['Train', 'Valid', 'Test_a', 'Test_b'], loc='upper right', fontsize=16)
    plt.savefig(args.extract_path + "/inputs.png")
    plt.show()
    print('Completed!\n')
Ejemplo n.º 7
0
def noise_suppressed_example(plot=False):
    """
    In this example, we demonstrate how we suppress noise using dynamic gains in an audio equalizer [EQ].
    The basic idea is we use the clean to noisy energy ratio of each frequency band as the gain of suppression.
    It is done in a very small windows (500 point = 31.25ms) so that it can respone very quickly.
    Then we apply these gains to an equalizer (a set of parallel bandpass filter). The gains are changing very fast
    so the noise will be suppressed when it is detected.

    This is also the principle that how do we generate the truth gains for the training data (y_train).
    """
    # change here to select the file and its noise mixing level.
    nfilt = 20
    test_num = 1  # which file
    test_noise_level = 10  # noise level in db, selected from 0, 10, 20, depeneded on dataset

    # change here to select the file and its noise mixing level.
    clean_file = "MS-SNSD/CleanSpeech_training/clnsp" + str(test_num) + ".wav"
    noisy_file = "MS-SNSD/NoisySpeech_training/noisy" + str(
        test_num) + "_SNRdb_" + str(test_noise_level) + ".0_clnsp" + str(
            test_num) + ".wav"

    (rate, clean_sig) = wav.read(clean_file)
    (rate, noisy_sig) = wav.read(noisy_file)
    clean_sig = clean_sig / 32768
    noisy_sig = noisy_sig / 32768

    # Calculate the energy of each frequency bands
    clean_band_eng, _ = fbank(clean_sig,
                              rate,
                              winlen=0.032,
                              winstep=0.032 / 2,
                              nfilt=nfilt,
                              nfft=512,
                              lowfreq=20,
                              highfreq=8000,
                              preemph=0)
    noisy_band_eng, _ = fbank(noisy_sig,
                              rate,
                              winlen=0.032,
                              winstep=0.032 / 2,
                              nfilt=nfilt,
                              nfft=512,
                              lowfreq=20,
                              highfreq=8000,
                              preemph=0)
    # gains
    gains = np.sqrt(clean_band_eng / noisy_band_eng)
    if (plot):
        plt.title("Gains")
        plt.plot(gains[:, :10])
        plt.show()

    # convert mel scale back to frequency band
    mel_scale = get_mel_scale(nfilt=nfilt, lowfreq=20, highfreq=8000)
    band_freq = mel2hz(mel_scale)
    band_frequency = band_freq[1:-1]  # the middle point of each band
    print('band frequency', band_frequency)

    # the noisy audio now pass to a set of parallel band pass filter.
    # which performed like an audio equalizer [EQ]
    # the different is we will change the gains of each band very quickly so that we suppress the noise while keeping the speech.
    # design our band pass filter for each band in the equalizer.
    # becasue the frequency band is overlapping, we need to reduce the signal to avoid overflow when converting back to int16.

    print("denoising using IIR filter")
    b, a = iir_design(band_freq, rate)
    if plot:
        plot_frequency_respond(b, a)
    print("b", b)
    print("a", a)
    step = int(0.03125 * rate / 2)
    print("audio process step:", step)
    filtered_signal = np.zeros(len(noisy_sig))
    for i in range(len(b)):
        filtered_signal += bandpass_filter_iir(noisy_sig, b[i].copy(),
                                               a[i].copy(), step, gains[:, i])
        print("filtering with frequency: ", band_frequency[i])
    filtered_signal = filtered_signal * 0.6

    filtered_signal = np.clip(filtered_signal, -1, 1)
    wav.write("_filtered_sample.wav", rate,
              np.asarray(filtered_signal * 32767, dtype=np.int16))
    wav.write("_noisy_sample.wav", rate,
              np.asarray(noisy_sig * 32767, dtype=np.int16))
    print("noisy signal is saved to:", "_noisy_sample.wav")
    print("filtered signal is saved to:", "_filtered_sample.wav")
Ejemplo n.º 8
0

if __name__ == "__main__":
    # This example will generate 2 files, noisy speech and noise suppressed speech.
    # You might open them with your player to get a feeling ot what does it sound like.
    # It give you an idea that how does this energy based noise suppression work.
    noise_suppressed_example()

    # change this will change the whole system, including equalizer and RNN
    # it set: number of filter in equalizer, number of mfcc feature, and number of RNN output.
    # choose from 10 ~ 30.
    num_filter = 20

    # generate filter coefficient
    mel_scale = get_mel_scale(nfilt=num_filter, lowfreq=20, highfreq=8000)
    band_freq = mel2hz(mel_scale)
    b, a = iir_design(
        band_freq, 16000,
        order=1)  # >2 order will not stable with only float32 accuracy in C.
    generate_filter_header(b,
                           a,
                           order=int(b[0].shape[-1] / 2),
                           filename='equalizer_coeff.h')
    # plot frequency respond
    #plot_frequency_respond(b, a)

    print('Reading noisy and clean speech files...')
    # dataset generation start from here:
    # energy thresehold for voice activivity detection in clean speech.
    vad_energy_threashold = 0.1
Ejemplo n.º 9
0
def get_filterbanks(nfilt=20,
                    nfft=512,
                    samplerate=16000,
                    lowfreq=0,
                    highfreq=None,
                    filtertype='mel',
                    multi_weight=False):
    """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
    to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)

    :param nfilt: the number of filters in the filterbank, default 20.
    :param nfft: the FFT size. Default is 512.
    :param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
    :param lowfreq: lowest band edge of mel filters, default 0 Hz
    :param highfreq: highest band edge of mel filters, default samplerate/2
    :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
    """

    highfreq = highfreq or samplerate / 2
    assert highfreq <= samplerate / 2, "highfreq is greater than samplerate/2"

    if filtertype == 'mel':
        # compute points evenly spaced in mels
        lowmel = hz2mel(lowfreq)
        highmel = hz2mel(highfreq)
        melpoints = np.linspace(lowmel, highmel, nfilt + 2)
        # our points are in Hz, but we use fft bins, so we have to convert from Hz to fft bin number
        bin = np.floor((nfft + 1) * mel2hz(melpoints) / samplerate)
    elif filtertype == 'amel':
        # compute points evenly spaced in mels
        lowmel = hz2amel(lowfreq)
        highmel = hz2amel(highfreq)
        melpoints = np.linspace(lowmel, highmel, nfilt + 2)
        # our points are in Hz, but we use fft bins, so we have to convert from Hz to fft bin number
        bin = np.floor((nfft + 1) * amel2hz(melpoints) / samplerate)

    elif filtertype == 'linear':
        linearpoints = np.linspace(lowfreq, highfreq, nfilt + 2)
        # our points are in Hz, but we use fft bins, so we have to convert from Hz to fft bin number
        bin = np.floor((nfft + 1) * linearpoints / samplerate)

    elif filtertype.startswith('dnn'):
        x = np.arange(0, 161) * samplerate / 2 / 160
        if filtertype.endswith('timit.fix'):
            y = np.array(c.TIMIT_FIlTER_FIX)
        elif filtertype.endswith('timit.var'):
            y = np.array(c.TIMIT_FIlTER_VAR)
        elif filtertype.endswith('timit.mdv'):
            y = np.array(c.TIMIT_FIlTER_MDV)
        elif filtertype.endswith('libri.fix'):
            y = np.array(c.LIBRI_FILTER_FIX)
        elif filtertype.endswith('libri.var'):
            y = np.array(c.LIBRI_FILTER_VAR)
        elif filtertype.endswith('vox1.soft'):
            y = np.array(c.VOX_FILTER_SOFT)
        elif filtertype == 'dnn.vox1':
            y = np.array(c.VOX_FILTER)

        f = interpolate.interp1d(x, y)
        x_new = np.arange(nfft // 2 + 1) * samplerate / 2 / (nfft // 2)
        lowfreq_idx = np.where(x_new >= lowfreq)[0]
        highfreq_idx = np.where(x_new <= highfreq)[0]
        ynew = f(x_new)  # 计算插值结果

        ynew[:int(lowfreq_idx[0])] = 0
        if highfreq_idx[-1] < len(x_new) - 1:
            ynew[int(highfreq[-1] + 1):] = 0

        weight = ynew / np.sum(ynew)

        bin = []
        bin.append(lowfreq_idx[0])

        for j in range(nfilt):
            num_wei = 0.
            for i in range(nfft // 2 + 1):
                num_wei += weight[i]
                if num_wei > (j + 1) / (nfilt + 1):
                    bin.append(i - 1)
                    break
                else:
                    continue

        bin.append(highfreq_idx[-1])

    fbank = np.zeros([nfilt, nfft // 2 + 1])
    for j in range(0, nfilt):
        for i in range(int(bin[j]), int(bin[j + 1])):
            fbank[j, i] = (i - bin[j]) / (bin[j + 1] - bin[j])

        for i in range(int(bin[j + 1]), int(bin[j + 2])):
            fbank[j, i] = (bin[j + 2] - i) / (bin[j + 2] - bin[j + 1])

    if multi_weight:
        y = np.array(c.TIMIT_FIlTER_VAR)
        fbank = fbank * (y / y.max())

    return fbank
Ejemplo n.º 10
0
csf_ssc = csf.ssc(audio)
assert (np.shape(psf_ssc) == np.shape(csf_ssc))
error2d(psf_ssc, csf_ssc)

print ''
print 'hz2mel'
print '======'
assert (get_error(psf.hz2mel(8000), csf.hz2mel(8000)) <= acceptable_error)
assert (get_error(psf.hz2mel(16000), csf.hz2mel(16000)) <= acceptable_error)
assert (get_error(csf.mel2hz(csf.hz2mel(8000)), 8000) <= acceptable_error)
print ' ✓'

print ''
print 'mel2hz'
print '======'
assert (get_error(psf.mel2hz(2595), csf.mel2hz(2595)) <= acceptable_error)
assert (get_error(csf.mel2hz(5190), csf.mel2hz(5190)) <= acceptable_error)
assert (get_error(csf.hz2mel(csf.mel2hz(2595)), 2595) <= acceptable_error)
print ' ✓'

print ''
print 'get_filterbanks'
print '==============='
psf_filterbanks = psf.get_filterbanks()
csf_filterbanks = csf.get_filterbanks()
assert (np.shape(psf_filterbanks) == np.shape(csf_filterbanks))
error2d(psf_filterbanks, csf_filterbanks)

print ''
print 'lifter'
print '======'
Ejemplo n.º 11
0
def getmelpoint(_n_filt=N_FILT):
    lowmel = hz2mel(0)
    highmel = hz2mel(SAMPLING_RATE / 2)
    melpoints = np.linspace(lowmel, highmel, _n_filt + 1)
    return mel2hz(melpoints)[1:_n_filt + 1]
Ejemplo n.º 12
0
        self.num_filter = num_filter
        self.sr = sr
<<<<<<< HEAD

=======
        self.exp = exp
        self.filter_fix = filter_fix

        requires_grad = not filter_fix
>>>>>>> Server/Server
        input_freq = np.linspace(0, self.sr / 2, input_dim)
        self.input_freq = nn.Parameter(torch.from_numpy(input_freq).expand(num_filter, input_dim).float(),
                                       requires_grad=False)

        centers = np.linspace(0, hz2mel(sr / 2), num_filter + 2)
        centers = mel2hz(centers)
<<<<<<< HEAD
        self.frequency_center = nn.Parameter(torch.from_numpy(centers[1:-1]).float().reshape(num_filter, 1))
=======
        self.frequency_center = nn.Parameter(torch.from_numpy(centers[1:-1]).float().reshape(num_filter, 1),
                                             requires_grad=requires_grad)
>>>>>>> Server/Server

        bandwidth = []
        for i in range(2, len(centers)):
            bandwidth.append(centers[i] - centers[i - 1])
<<<<<<< HEAD
        self.bandwidth = nn.Parameter(torch.tensor(bandwidth).reshape(num_filter, 1).float())
        self.gain = nn.Parameter(torch.ones(num_filter, dtype=torch.float32).reshape(num_filter, 1))

    def forward(self, input):