def DPGMM_test(cov_type, alpha_val):
    #speakers_MFCC_dict = {}
    #speaker_GMM_dict = {}
    files = glob.glob(os.getcwd()+'\\speakers\\*.wav')
    gauss_num = 32
    iterator = 1
    test_files = []
    good = 0
    bad = 0
    total = 0

    for file in files:
        if file[-6:-4] == '09':
            test_files.append(file)

    for file in files:
        #print(file)
        if file[-6:-4] == '00':   #file[len(file)-12:len(file)-9]
            current_speaker = file[len(file)-10:len(file)-6]
            #print("############# Calculate MFCC and DPGMM for ", current_speaker, " , speaker no ", str(iterator))
            #if iterator == 572:
            #    print("Tu bedzie error")

            merged_files = np.array([])
            for i in range(0, 9):
                current_file = wav.read(file[:-5]+str(i)+file[-4:])
                merged_files = np.append(merged_files, current_file[1])
            #print(type(merged_files))
            speaker_MFCC = MFCC.extract(merged_files)
            speaker_MFCC = speaker_MFCC[:, 1:]
            #speakers_MFCC_dict[current_speaker] = speaker_MFCC
            g = mixture.DPGMM(n_components=gauss_num, n_iter=100, covariance_type=cov_type, alpha=alpha_val)
            g.fit(speaker_MFCC)
            #speaker_model = np.array([g.means_, g.precs_, np.repeat(g.weights_[:, np.newaxis], 12, 1)])
            #speaker_GMM_dict[current_speaker] = speaker_model
            log_prob = -10000
            winner = 'nobody'
            for test_file in test_files:
                current_test_speaker = test_file[len(test_file)-10:len(test_file)-6]
                current_test_file = wav.read(test_file)
                test_speaker_MFCC = MFCC.extract(current_test_file[1])
                test_speaker_MFCC = test_speaker_MFCC[:, 1:]
                temp_prob = np.mean(g.score(test_speaker_MFCC))
                if temp_prob > log_prob:
                    log_prob = temp_prob
                    winner = current_test_speaker
            if winner == current_speaker:
                good += 1
            else:
                bad += 1
            total +=1
            #print(current_speaker, " speaker no ", str(iterator), " is similar to ", winner, " - log prob = ", str(log_prob))
            #print("good = ", str(good), ", bad = ", str(bad), ", total = ", str(total))
            iterator += 1

    print("DPGMM (covariance_type - ", cov_type, ", alpha - ", str(alpha_val), "), Efficiency = ", str(good/total))
Example #2
0
def mix_feature(tup):
    mfcc = MFCC.extract(tup)
    lpc = LPC.extract(tup)
    if len(mfcc) == 0:
        print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(
            tup[1])
    return np.concatenate((mfcc, lpc), axis=1)
Example #3
0
def select_events(nevents,nfeatures):
    global groups
    fftbins = 8192
    featurewidth = 16
    print "Selecting %d random spectral features.." % nfeatures
    feature_bins = np.random.randint(featurewidth/2,(fftbins/8),nfeatures)
    print "Selecting %d random audio events.." % nevents
    events = np.random.randint(0,len(faudio)-grain_mid,nevents)
    # Initialise features array with the first variable as index
    features = np.zeros((14,nevents))
    features[0] = np.arange(0,nevents)
    print "Computing audio event spectrograms.."
    # For each event..
    for i in range(0,nevents):
        # Calculate spectrogram for the event
        _fftevent = faudio[events[i]:min(events[i]+1000,len(faudio))]*sig.hann(1000)
        mfcc = MFCC.extract(_fftevent)
        features[:,i] = np.append(i,mfcc)
        #powerspec = abs(fft(_fftevent,fftbins)) ** 2
        #melspec = np.dot(powerspec,melFilterBank(len(_fftevent)))
        #logspec = np.log(melspec)
        #mfcc = dct(logspec,type=2)
        #print mfcc
        # Calculate each feature for this event
        #for j in range(0,nfeatures):
        #    features[j+1][i] = abs(np.mean(abs(mags[(feature_bins[j]-featurewidth/2):(feature_bins[j]+featurewidth/2)])))
    print "Clustering events with K-Means algorithm.."
    groups = kmeans(np.transpose(features),tracks,minit='points',iter=30)[1]
    return [events,groups]
Example #4
0
 def Classify (self, sample, verbose = True):
   length = len (sample)
   features = MFCC.extract (numpy.frombuffer (sample, numpy.int16))
   gestures = {}
   for gesture in self.params:
     d = []
     for tsample in self.params[gesture]:
       total_distance = 0
       smpl_length = len(tsample)
       
       if(numpy.abs(length - smpl_length) <= 0):
          continue
       
       for i in range (min (len (features), len (tsample))):
         total_distance += dist.cityblock(features[i], tsample[i])
       
       d.append (total_distance/float (i))
     score = numpy.min(d)
     gestures[gesture] = score
     if(verbose):
         print "Gesture %s: %f" % (gesture, score)
     try:
       if (score < minimum):
         minimum = score
         lowest = gesture
     except:
       minimum = score
       lowest = gesture
   if verbose:
      print lowest, minimum
   if(minimum < THRESHOLD):
     return lowest
   else:
     return None
def test(filename, verbose = False):
    rawdata = loadWAVfile(filename)
    mfcc = MFCC.extract(rawdata, show=False)

    #Test the hmm
    HMM_Model.test(mfcc, verbose)
    return
Example #6
0
def GenerateParams (gestures, verbose = True):
  params = {}
  for gesture in gestures:
    if(verbose):
      print "Processing " + gesture
    l = []
    for sample in gestures[gesture]:
      l.append (MFCC.extract (numpy.frombuffer (sample, numpy.int16)))
    params[gesture] = l
  return params
Example #7
0
def train(filename, id):
    rawdata = loadWAVfile(filename)
    mfcc = MFCC.extract(rawdata, show=False)
    model = VQ.Model(id)

    #Train the VQ
    model.train(mfcc)

    #Train the HMM
    create_file(mfcc, id)
    return
Example #8
0
def train(filename, id):
    rawdata = loadWAVfile(filename)
    mfcc = MFCC.extract(rawdata, show=False)
    model = VQ.Model(id)

    #Train the VQ
    model.train(mfcc)

    #Train the HMM
    create_file(mfcc, id)
    return
Example #9
0
def load():
    names = [
        "Mathematics", "Biology", "PoliticalScience", "Statistics",
        "Psychology"
    ]
    sampledict = {}
    for name in names:
        sampledict[name] = []
        for fname in glob.glob("Samples/" + name + " *"):
            w = wread(fname)
            sampledict[name].append(MFCC.extract(w[1])[:30])
    return names, sampledict
Example #10
0
 def produce_mfcc(self, filename):
     wav = wave.open(filename, "r")
     x = np.fromstring(wav.readframes(self.sz), dtype=np.int16)
     #(nchannels, sampwidth, framerate, nframes,
     # comptype, compname) = wav.getparams()
     mfcc = MFCC.extract(x)
     match = self.lab_extractor.match(filename)
     try:
         label = match.group(1)
     except:
         label = "unknown"
         print >> sys.stderr, "unknown labels encountered"
     return (mfcc, label)
def add_to_database(url_, person_name_):
    gmm_models = {}

    if os.path.isfile('mfcc.mat'):
        gmm_models = sio.loadmat('mfcc.mat')
    print "Recording and processing...\n\n"
    full_sound_model = read_radio_stream(url_)

    wav.write('People\\'+person_name_+'.wav', 11025, full_sound_model/32767.0)

    print "Calculating MFCC and saving the model..."
    mfcc_features = MFCC.extract(full_sound_model)
    mfcc_features = mfcc_features[:, 1:]

    g = mixture.GMM(n_components=128)
    g.fit(mfcc_features)
    model = np.array([g.means_, g.covars_, np.repeat(g.weights_[:, np.newaxis], 12, 1)])  # weights have to be repeated to properly save the np array



    print len(g.means_)

    gmm_models[person_name_] = model
    sio.savemat('mfcc_32.mat', gmm_models, oned_as='row')
def read_radio_stream(url_):

    database = sio.loadmat('mfcc_16_fft256_GMM.mat')
    database.pop('__header__')
    database.pop('__version__')
    database.pop('__globals__')

    r2 = urllib.urlopen(url_)
    pygame.mixer.init(44100, -16, 2, 2048)
    print pygame.mixer.get_init()
    chan1 = pygame.mixer.find_channel()

    format = sound.AFMT_S16_LE
    print sound.getODevices()
    #snd_out = sound.Output(44100, 2, format)

    dm = muxer.Demuxer('mp3')
    dec = None
    snd = None

    print(r2.info())
    print('###################\n')

    #f = open('radio.mp3', 'wb')
    #g = open('radio.wav', 'wb')
    i = 0
    while True:  #i < 3:

        samples = r2.read(15000)

        frames = dm.parse(samples)

        if dec is None:
            # Open decoder
            dec = acodec.Decoder(dm.streams[0])
        

        #start = time.time()
        sound_np_array = ansic_to_numpy(frames, dec)
        #print (sound_np_array.shape[0])/44100.0
        #elapsed = (time.time() - start)
        #print 'decode and ndaray - %2.8f' %elapsed
        
        #start = time.time()
        to_play = np.array(np.repeat(sound_np_array[:, np.newaxis], 2, 1), dtype = 'int16')
        sounds = pygame.sndarray.make_sound(to_play)
        chan1.queue(sounds)
        #elapsed = (time.time() - start)
        #print 'to play - %2.8f' %elapsed

        #start = time.time()
        sound_np_array = decimate(sound_np_array, 4)
        #elapsed = (time.time() - start)
        #print 'downsample - %2.8f' %elapsed

        #start = time.time()
        mfcc_features = MFCC.extract(sound_np_array) #1.5s
        mfcc_features = mfcc_features[:, 1:]
        #elapsed = (time.time() - start)
        #print 'mfcc - %2.8f' %elapsed


        g = mixture.GMM(n_components=16)
        log_prob = -10000
        winner = 'nobody'

        for key, values in database.iteritems():
            try:
                g.means_ = values[0, :, :]
                g.covars_ = values[1, :, :]
                g.weights_ = values[2, :, 1]
                
                #start = time.time()
                temp_prob = np.mean(g.score(mfcc_features))
                #elapsed = (time.time() - start)
                #print 'log-likelihood - %2.8f' %elapsed
                
                if temp_prob > log_prob:
                    log_prob = temp_prob
                    winner = key
            except TypeError:
                print 'error dla ', key

        print winner, log_prob

    print('\n###################')
Example #13
0
def get_mfcc_worker(fpath):
    print('mfcc: ' + fpath)
    fs, signal = wavfile.read(fpath)
    mfcc = MFCC.extract(fs, signal)
    return mfcc[:1500]
Example #14
0
def collect(n=20):
    obs = []
    for i in xrange(n):
        os.system("arecord -f S16_LE --rate=44100 -D hw:1,0 -d 3 test.wav")
        obs.append(MFCC.extract(wavfile.read("test.wav")[1]))
    return obs
def GMM_test(ii):
    speakers_MFCC_dict = {}
    speaker_GMM_dict = {}
    files = glob.glob(os.getcwd()+'\\speakers\\*.wav')
    gauss_num = 32
    iterator = 1
    num_iter = ii


    if os.path.isfile('mfcc_'+str(gauss_num)+'.mat'):
        speaker_GMM_dict = sio.loadmat('mfcc_'+str(gauss_num)+'.mat')
        speaker_GMM_dict.pop('__header__')
        speaker_GMM_dict.pop('__version__')
        speaker_GMM_dict.pop('__globals__')
    else:
        for file in files:
            #print(file)
            if file[-6:-4] == '00':   #file[len(file)-12:len(file)-9]
                current_speaker = file[len(file)-10:len(file)-6]
                print("############# Calculate MFCC and GMM for ", current_speaker, " , speaker no ", str(iterator))
                #if iterator == 572:
                #    print("Tu bedzie error")

                iterator += 1
                merged_files = np.array([])
                for i in range(0, 9):
                    current_file = wav.read(file[:-5]+str(i)+file[-4:])
                    merged_files = np.append(merged_files, current_file[1])
                #print(type(merged_files))
                speaker_MFCC = MFCC.extract(merged_files)
                speaker_MFCC = speaker_MFCC[:, 1:]

                speakers_MFCC_dict[current_speaker] = speaker_MFCC
                g = mixture.GMM(n_components=gauss_num, n_iter=num_iter)
                g.fit(speaker_MFCC)

                speaker_model = np.array([g.means_, g.covars_, np.repeat(g.weights_[:, np.newaxis], 12, 1)])
                speaker_GMM_dict[current_speaker] = speaker_model


        sio.savemat('mfcc_'+str(gauss_num)+'.mat', speaker_GMM_dict, oned_as='row')


    iterator = 1
    good = 0
    bad = 0
    total = 0

    for file in files:
        if file[-6:-4] == '09':
            g = mixture.GMM(n_components=gauss_num, n_iter=num_iter)
            current_file = wav.read(file)
            current_speaker = file[len(file)-10:len(file)-6]
            #print(current_speaker, )
            speaker_MFCC = MFCC.extract(current_file[1])
            speaker_MFCC = speaker_MFCC[:, 1:]
            log_prob = -10000
            winner = 'nobody'
            for key, values in speaker_GMM_dict.items():
                try:
                    g.means_ = values[0, :, :]
                    g.covars_ = values[1, :, :]
                    g.weights_ = values[2, :, 1]
                    temp_prob = np.mean(g.score(speaker_MFCC))
                    if temp_prob > log_prob:
                        log_prob = temp_prob
                        winner = key
                except TypeError:
                    print('error for ', key)
            if current_speaker == winner:
                good += 1
            else:
                bad += 1
            total +=1
            print(current_speaker, " speaker no ", str(iterator), " is similar to ", winner, " - log prob = ", str(log_prob))
            print("good = ", str(good), ", bad = ", str(bad), ", total = ", str(total))
            iterator += 1

    print("GMM, n_iter = ", num_iter, ", Efficiency = ", str(good/total))
Example #16
0
def get_mfcc_worker(fpath):
    print('mfcc: ' + fpath)
    fs, signal = wavfile.read(fpath)
    mfcc = MFCC.extract(fs, signal)
    return mfcc[:1500]
	def CalculateMFCCs(self):
		# This function calculates and returns the MFCC from the given wavfile
		mfccs = MFCC.extract(self.wav_data)
		return mfccs
Example #18
0
# python -i <name of this .py file>
import numpy as np
from scikits.audiolab import Sndfile

SOUND_DIRECTORY = 'small_data_sample/right_whale'

test_file = '%s/train12.aiff' % SOUND_DIRECTORY

f = Sndfile(test_file, 'r')

# Sndfile instances can be queried for the audio file meta-data
fs = f.samplerate
nc = f.channels
enc = f.encoding

# Reading is straightfoward
data = f.read_frames(1000)

# This reads the next 1000 frames, e.g. from 1000 to 2000, but as single precision
data_float = f.read_frames(1000, dtype=np.float32)
print data_float.shape

import MFCC
# data_float is a wave signal saved in a 1-D numpy array
# mfcc is a 2-D numpy array, where each row is the
# MFCC of a frame in data_float
mfcc = MFCC.extract(data_float, show = True)
# This will also plot the MFCC and the spectrogram
# reconstructed from MFCC by inverse DCT

def collect(n=20):
    obs = []
    for i in xrange(n):
        os.system("arecord -f S16_LE --rate=44100 -D hw:1,0 -d 3 test.wav")
        obs.append(MFCC.extract(wavfile.read("test.wav")[1]))
    return obs