Example #1
0
 def extract(self, file_name):
     fs, signal = wav.read(file_name)
     #        print(file_name)
     signal = signal[:, 0]
     mfcc = speechpy.mfcc(signal,
                          sampling_frequency=fs,
                          frame_length=0.020,
                          frame_stride=0.01,
                          num_filters=40,
                          fft_length=512,
                          low_frequency=0,
                          high_frequency=None)
     #print(mfcc.shape)
     #print(mfcc)
     mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc)
     #print('mfcc feature cube shape:' , mfcc_feature_cube.shape)
     #print(mfcc_feature_cube)
     return mfcc, mfcc_feature_cube
Example #2
0
lib_path = os.path.abspath(os.path.join('..'))
print(lib_path)
sys.path.append(lib_path)
import speechpy
import os

file_name = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         'Alesis-Sanctuary-QCard-AcoustcBas-C2.wav')
fs, signal = wav.read(file_name)
signal = signal[:, 0]

############# Extract MFCC features #############
mfcc = speechpy.mfcc(signal,
                     sampling_frequency=fs,
                     frame_length=0.020,
                     frame_stride=0.01,
                     num_filters=40,
                     fft_length=512,
                     low_frequency=0,
                     high_frequency=None)
mfcc_cmvn = speechpy.cmvnw(mfcc, win_size=301, variance_normalization=True)
print('mfcc(mean + variance normalized) feature shape=', mfcc_cmvn.shape)

mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc)
print('mfcc feature cube shape=', mfcc_feature_cube.shape)

############# Extract logenergy features #############
logenergy = speechpy.lmfe(signal,
                          sampling_frequency=fs,
                          frame_length=0.020,
                          frame_stride=0.01,
                          num_filters=40,
Example #3
0
import time

from speechpy import mfcc
from speechpy import delta
from speechpy import log_filter_bank
import scipy.io.wavfile as wav

from scipy import signal as sig
import matplotlib.pyplot as plt

from random import shuffle

if __name__ == '__main__':
    filename = './sample_24414.wav'
    rate, signal = wav.read(filename)
    mfcc_feature = mfcc(signal, rate)
    d_mfcc_feature = delta(mfcc_feature, 2)
    filter_bank_feature = log_filter_bank(signal, rate)
    feature = filter_bank_feature[1:3, :]

    print('signal:', signal)
    print('rate:', rate)
    print('np.shape(signal):', np.shape(signal))
    print('mfcc_feature:', mfcc_feature)
    print('np.shape(mfcc_feature):', np.shape(mfcc_feature))
    print('d_mfcc_feature:', d_mfcc_feature)
    print('np.shape(d_mfcc_feature):', np.shape(d_mfcc_feature))
    print('feature:', feature)
    print('np.shape(feature):', np.shape(feature))

    # exit(0)
Example #4
0
def get_mfcc(filename,
             downsample=0,
             delta=False,
             noisereduction=True,
             normalizemean=False,
             numcoeff=13,
             verbose=False):
    def print_if(string, verb):
        if verb:
            print(string)

    """ Returns the MFCC of a given WAV file as a numpy array. Options include:
		delta:          append delta (velocity) features to MFCC; doubles # features per frame
		noisereduction: apply noise reduction before computing MFCC
		normalizemean:  output MFCC as normalized global mean - mutually exclusive from delta
		numcoeff:       specify number of cepstral coefficients; usually scaled linearly with sampling rate
		verbose:        enable detailed print statements

	"""
    # Perform downsampling and creates another downsampled wav file if specified
    if downsample > 0:
        print_if('Downsampling On by factor of ' + str(downsample), verbose)
        resample(filename, downsample)
        filename = "resampled.wav"
    else:
        print_if('Downsampling Off', verbose)

    # Perform noise reduction before calculating coefficients
    if noisereduction:
        print_if('Noise Reduction On', verbose)
        fs, signal = reduce_noise(filename)

    else:
        print_if('Noise Reduction Off', verbose)
        fs, signal = wav.read(filename)

    print_if('\nMFCC (Mel Frequency Cepstral Coefficients)\n' + div, verbose)

    print_if('File sampling frequency: ' + str(fs) + '\n', verbose)

    mfcc = speechpy.mfcc(
        signal,
        sampling_frequency=fs,
        frame_length=0.020,
        frame_stride=0.020,  # Frame overlap amount (0.02 is no overlap)
        num_cepstral=numcoeff,  # Default 13; scale with sample rate
        num_filters=40,
        fft_length=512,
        low_frequency=0,
        high_frequency=None,
        dc_elimination=True)

    mfcc_feature_cube = speechpy.extract_derivative_feature(mfcc)
    length, numfeatures, dim = mfcc_feature_cube.shape

    print_if('MFCC dimension: ' + str(mfcc_feature_cube.shape), verbose)
    print_if(mfcc, verbose)

    if delta:
        delta_features = get_delta(mfcc)
        mfcc_delta = np.empty((length, numfeatures * 2))

        for i in range(len(mfcc)):
            mfcc_delta[i] = np.concatenate([mfcc[i], delta_features[i]])

        mfcc_delta_feature_cube = speechpy.extract_derivative_feature(
            mfcc_delta)
        print_if(
            '\nMFCC with Delta dimension: ' +
            str(mfcc_delta_feature_cube.shape), verbose)
        print_if(mfcc_delta, verbose)

        return mfcc_delta

    # Option to return normalized cepstral mean (for each set of coeffs, subtracts mean from each coeff)
    if normalizemean:
        mfcc_normalizedmean = speechpy.cmvn(mfcc, variance_normalization=False)

        mfcc_normalizedmean_feature_cube = speechpy.extract_derivative_feature(
            mfcc_normalizedmean)
        print_if(
            '\nMFCC with normalized mean dimension: ' +
            str(mfcc_normalizedmean_feature_cube.shape), verbose)
        print_if(mfcc_normalizedmean, verbose)

        return mfcc_normalizedmean

    return mfcc