Ejemplo n.º 1
0
def mel_dist(x1, x2, fs, num, wlen, inc):
	"""
	计算两信号x1,x2的MFCC参数和距离
	:param x1: signal 1
	:param x2: signal 2
	:param fs: sample frequency
	:param num: the number we select in MFCC
	:param wlen: frame length
	:param inc: frame shift
	:return Dcep: distance
	:return Ccep1, Ccep2: num MFCC
	"""
	M = MFCC()
	ccc1 = M.mfcc(x1, Fs, num, wlen, inc)		# MFCC
	ccc2 = M.mfcc(x2, Fs, num, wlen, inc)
	fn1 = np.shape(ccc1)[0]		# frame number
	Ccep1 = ccc1[:, 0 : num]
	Ccep2 = ccc2[:, 0 : num]

	Dcep = np.zeros(fn1)	# distance
	for i in range(fn1):
		Cn1 = Ccep1[i, :]
		Cn2 = Ccep2[i, :]
		Dstu = 0
		for k in range(num):
			Dstu = Dstu + (Cn1[k] - Cn2[k]) ** 2

		Dcep[i] = np.sqrt(Dstu)


	return Dcep, Ccep1, Ccep2
def DPGMM_test(cov_type, alpha_val):
    #speakers_MFCC_dict = {}
    #speaker_GMM_dict = {}
    files = glob.glob(os.getcwd()+'\\speakers\\*.wav')
    gauss_num = 32
    iterator = 1
    test_files = []
    good = 0
    bad = 0
    total = 0

    for file in files:
        if file[-6:-4] == '09':
            test_files.append(file)

    for file in files:
        #print(file)
        if file[-6:-4] == '00':   #file[len(file)-12:len(file)-9]
            current_speaker = file[len(file)-10:len(file)-6]
            #print("############# Calculate MFCC and DPGMM for ", current_speaker, " , speaker no ", str(iterator))
            #if iterator == 572:
            #    print("Tu bedzie error")

            merged_files = np.array([])
            for i in range(0, 9):
                current_file = wav.read(file[:-5]+str(i)+file[-4:])
                merged_files = np.append(merged_files, current_file[1])
            #print(type(merged_files))
            speaker_MFCC = MFCC.extract(merged_files)
            speaker_MFCC = speaker_MFCC[:, 1:]
            #speakers_MFCC_dict[current_speaker] = speaker_MFCC
            g = mixture.DPGMM(n_components=gauss_num, n_iter=100, covariance_type=cov_type, alpha=alpha_val)
            g.fit(speaker_MFCC)
            #speaker_model = np.array([g.means_, g.precs_, np.repeat(g.weights_[:, np.newaxis], 12, 1)])
            #speaker_GMM_dict[current_speaker] = speaker_model
            log_prob = -10000
            winner = 'nobody'
            for test_file in test_files:
                current_test_speaker = test_file[len(test_file)-10:len(test_file)-6]
                current_test_file = wav.read(test_file)
                test_speaker_MFCC = MFCC.extract(current_test_file[1])
                test_speaker_MFCC = test_speaker_MFCC[:, 1:]
                temp_prob = np.mean(g.score(test_speaker_MFCC))
                if temp_prob > log_prob:
                    log_prob = temp_prob
                    winner = current_test_speaker
            if winner == current_speaker:
                good += 1
            else:
                bad += 1
            total +=1
            #print(current_speaker, " speaker no ", str(iterator), " is similar to ", winner, " - log prob = ", str(log_prob))
            #print("good = ", str(good), ", bad = ", str(bad), ", total = ", str(total))
            iterator += 1

    print("DPGMM (covariance_type - ", cov_type, ", alpha - ", str(alpha_val), "), Efficiency = ", str(good/total))
Ejemplo n.º 3
0
def calculate_cluster_distance(sample, database):
    distances = list()
    sample_feature_vector = MFCC.voice_feature_extraction(sample)
    for database_sample in os.listdir(database):
        database_feature_vector = MFCC.voice_feature_extraction(
            os.path.join(database, database_sample))
        distances.append(
            DTW.dynamic_time_warping(sample_feature_vector,
                                     database_feature_vector))

    return np.mean(distances)
Ejemplo n.º 4
0
def super_vector(test_file_name, ubm_file):
    wav = mywave()
    waveData = wav.WaveRead(test_file_name)
    waveVadIdx = vad(waveData ** 2)
    waveData = waveData[waveVadIdx]

    MFCC_obj = MFCC(40,12,300,3400,0.97,16000,50,0.0256,256)
    MFCC_coef = MFCC_obj.sig2s2mfc(waveData)

    ubm = GMM(n_mix = 128, n_dim = 12)
    ubm.read(ubm_file)
    ubm.adapt(MFCC_coef)

    return ubm.means
Ejemplo n.º 5
0
def super_vector(test_file_name, ubm_file):
    wav = mywave()
    waveData = wav.WaveRead(test_file_name)
    waveVadIdx = vad(waveData**2)
    waveData = waveData[waveVadIdx]

    MFCC_obj = MFCC(40, 12, 300, 3400, 0.97, 16000, 50, 0.0256, 256)
    MFCC_coef = MFCC_obj.sig2s2mfc(waveData)

    ubm = GMM(n_mix=128, n_dim=12)
    ubm.read(ubm_file)
    ubm.adapt(MFCC_coef)

    return ubm.means
Ejemplo n.º 6
0
def mix_feature(tup):
    mfcc = MFCC.extract(tup)
    lpc = LPC.extract(tup)
    if len(mfcc) == 0:
        print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(
            tup[1])
    return np.concatenate((mfcc, lpc), axis=1)
Ejemplo n.º 7
0
def main():
    theta1 = loadmat('ml.mat')['theta1']
    theta2 = loadmat('ml.mat')['theta2']
    """Xtest = [];
  ytest = [];

  nspeakers = theta2.shape[0];
  folders = os.listdir("wav")
  for i in range(5):
    folder = folders[i];
    print(folder)
    files = [f for f in glob.glob("wav/"+folder + "/" + "**/*.wav", recursive=True)]
    sztraining = int(len(files)*0.6);
    for fid in range(sztraining, len(files)):
      sample_rate, signal = wav.read(files[fid])
      signal = signal[0:int(2 * sample_rate)]
      mfcc = MFCC.main(signal, sample_rate)
      Xtest.append(mfcc)
      ytest.append(i)
      
  ytest = np.array(ytest)
  Xtest = np.array(Xtest)

  pred = [];
  for i in range(len(Xtest)):
    pred.append(ml.predictWAV(theta1, theta2, Xtest[i])[0])
  print(np.mean(pred == ytest.flatten()) * 100)"""

    signal = []
    sample_rate = 16000

    #th = threading.Thread(target=audio.plot_audio, args=(1,));
    #th.start()
    while True:
        cmd = input("Digite um comando")
        print("CMDZAO = " + str(cmd))
        if cmd == "record":
            seconds = 7
            print("recording...")
            signal = sd.rec(int(seconds * sample_rate),
                            samplerate=sample_rate,
                            channels=1)
            sd.wait()
        elif cmd == "who":
            if not len(signal):
                print("no signal")
                continue
            sd.play(signal, sample_rate)

            signal = signal[0:int(2 * sample_rate)]
            mfcc = MFCC.main(signal, sample_rate)

            mlres = ml.predictWAV(theta1, theta2, mfcc)

            print("user id: {}".format(mlres[0]))
        elif cmd == "exit":
            break
        else:
            print("not found.")
    return 0
Ejemplo n.º 8
0
 def Classify (self, sample, verbose = True):
   length = len (sample)
   features = MFCC.extract (numpy.frombuffer (sample, numpy.int16))
   gestures = {}
   for gesture in self.params:
     d = []
     for tsample in self.params[gesture]:
       total_distance = 0
       smpl_length = len(tsample)
       
       if(numpy.abs(length - smpl_length) <= 0):
          continue
       
       for i in range (min (len (features), len (tsample))):
         total_distance += dist.cityblock(features[i], tsample[i])
       
       d.append (total_distance/float (i))
     score = numpy.min(d)
     gestures[gesture] = score
     if(verbose):
         print "Gesture %s: %f" % (gesture, score)
     try:
       if (score < minimum):
         minimum = score
         lowest = gesture
     except:
       minimum = score
       lowest = gesture
   if verbose:
      print lowest, minimum
   if(minimum < THRESHOLD):
     return lowest
   else:
     return None
Ejemplo n.º 9
0
def test(filename, verbose = False):
    rawdata = loadWAVfile(filename)
    mfcc = MFCC.extract(rawdata, show=False)

    #Test the hmm
    HMM_Model.test(mfcc, verbose)
    return
def get_label_data(directory):
    files = os.listdir(directory)
    # Randomized Test and Train set
    test_files = rand.sample(files, TEST_SIZE)

    train_mfcc = []
    test_mfcc = []

    for f in test_files:
        if f.endswith("wav"):
            test_mfcc.append(MFCC.get_mfcc(os.path.join(directory, f)))
            files.pop(files.index(f))
    
    train_mfcc = [MFCC.get_mfcc(os.path.join(directory, f)) for f in files if f.endswith("wav")]

    return train_mfcc, test_mfcc
Ejemplo n.º 11
0
def select_events(nevents,nfeatures):
    global groups
    fftbins = 8192
    featurewidth = 16
    print "Selecting %d random spectral features.." % nfeatures
    feature_bins = np.random.randint(featurewidth/2,(fftbins/8),nfeatures)
    print "Selecting %d random audio events.." % nevents
    events = np.random.randint(0,len(faudio)-grain_mid,nevents)
    # Initialise features array with the first variable as index
    features = np.zeros((14,nevents))
    features[0] = np.arange(0,nevents)
    print "Computing audio event spectrograms.."
    # For each event..
    for i in range(0,nevents):
        # Calculate spectrogram for the event
        _fftevent = faudio[events[i]:min(events[i]+1000,len(faudio))]*sig.hann(1000)
        mfcc = MFCC.extract(_fftevent)
        features[:,i] = np.append(i,mfcc)
        #powerspec = abs(fft(_fftevent,fftbins)) ** 2
        #melspec = np.dot(powerspec,melFilterBank(len(_fftevent)))
        #logspec = np.log(melspec)
        #mfcc = dct(logspec,type=2)
        #print mfcc
        # Calculate each feature for this event
        #for j in range(0,nfeatures):
        #    features[j+1][i] = abs(np.mean(abs(mags[(feature_bins[j]-featurewidth/2):(feature_bins[j]+featurewidth/2)])))
    print "Clustering events with K-Means algorithm.."
    groups = kmeans(np.transpose(features),tracks,minit='points',iter=30)[1]
    return [events,groups]
Ejemplo n.º 12
0
def get_label_data(label):
    files = os.listdir(label)
    test_mfcc = [
        MFCC.get_mfcc(os.path.join(label, f)) for f in files
        if f.endswith("wav")
    ]
    return test_mfcc
Ejemplo n.º 13
0
def GenerateParams (gestures, verbose = True):
  params = {}
  for gesture in gestures:
    if(verbose):
      print "Processing " + gesture
    l = []
    for sample in gestures[gesture]:
      l.append (MFCC.extract (numpy.frombuffer (sample, numpy.int16)))
    params[gesture] = l
  return params
Ejemplo n.º 14
0
def train(filename, id):
    rawdata = loadWAVfile(filename)
    mfcc = MFCC.extract(rawdata, show=False)
    model = VQ.Model(id)

    #Train the VQ
    model.train(mfcc)

    #Train the HMM
    create_file(mfcc, id)
    return
Ejemplo n.º 15
0
def train(filename, id):
    rawdata = loadWAVfile(filename)
    mfcc = MFCC.extract(rawdata, show=False)
    model = VQ.Model(id)

    #Train the VQ
    model.train(mfcc)

    #Train the HMM
    create_file(mfcc, id)
    return
Ejemplo n.º 16
0
def load():
    names = [
        "Mathematics", "Biology", "PoliticalScience", "Statistics",
        "Psychology"
    ]
    sampledict = {}
    for name in names:
        sampledict[name] = []
        for fname in glob.glob("Samples/" + name + " *"):
            w = wread(fname)
            sampledict[name].append(MFCC.extract(w[1])[:30])
    return names, sampledict
Ejemplo n.º 17
0
 def getPerson(self):
     if self.file[0] == '':
         print('empty file!')
         return
     sample_rate, signal = read(self.file[0])
     theta1 = loadmat('ml.mat')['theta1']
     theta2 = loadmat('ml.mat')['theta2']
     mfcc = MFCC.main(signal, sample_rate)
     mlres = ml.predictWAV(theta1, theta2, mfcc)
     self.whoAmIWindow = WhoAmIWindowClass(mlres)
     self.whoAmIWindow.setWindowTitle('Result')
     self.whoAmIWindow.show()
Ejemplo n.º 18
0
def add_to_database(parameters, noteNumber, data, orchestra):
    instrument = parameters[2]
    tech = parameters[3]
    dyn = parameters[4]
    #note = parameters[5]
    dyn_db = assignDynamics.assign_dynamics(
        dyn, instrument, dynamics_list
    )  # function parameters: dynamics, instrument name, dynamics list
    data = cutSample(data)
    data = normalize_sound_file.normalize_audio_file(
        data, dyn_db)  #Set sound file level according to the loaded text file
    #print("data normalized")
    #mfcc_data=librosa.feature.mfcc(y=data,sr=fs,n_mfcc=12,win_length=fs)
    M = len(data)  #Length of data (should be 44100)
    spectrum = np.fft.fft(data, axis=0)[:M // 2 + 1:-1]  #Calculate the fft
    #print("spectrum calculated")
    S = np.abs(spectrum)  #Get rid of complex numbers
    S = 20 * np.log10(S)  #dB values of data
    try:
        masking_freq, masking_threshold = maskingCurve.maskingCurve(
            S, noteNumber)  #Calculate the masking curve
    except:
        print("Masking calculation fail, using flat masking curve")
        masking_freq = constants.threshold[:, 0]
        masking_threshold = np.ones(106)
    #print("masking calculated")
    mfcc_data, centroid = MFCC.custom_mfcc(
        data)  #Calculate mfcc and spectral centroid
    #print("mfcc calculated")
    LpcLocs, LpcFreqs = lpc_coeffs.lpc_coeffs(
        data)  #calculate LPC-frequency response
    #print("lpc calculated")
    #Add everything to database (except fft spectrum):
    nested_update(
        orchestra, {
            instrument: {
                tech: {
                    dyn: {
                        noteNumber: {
                            "data": data,
                            "masking_curve": masking_threshold,
                            "masking_locs": masking_freq,
                            "lpc_curve": LpcFreqs,
                            "lpc_locs": LpcLocs,
                            "mfcc": mfcc_data,
                            "centroid": centroid
                        }
                    }
                }
            }
        })
    return orchestra
Ejemplo n.º 19
0
 def produce_mfcc(self, filename):
     wav = wave.open(filename, "r")
     x = np.fromstring(wav.readframes(self.sz), dtype=np.int16)
     #(nchannels, sampwidth, framerate, nframes,
     # comptype, compname) = wav.getparams()
     mfcc = MFCC.extract(x)
     match = self.lab_extractor.match(filename)
     try:
         label = match.group(1)
     except:
         label = "unknown"
         print >> sys.stderr, "unknown labels encountered"
     return (mfcc, label)
Ejemplo n.º 20
0
def write_to_csv(location):

    print "omer"
    os.chdir(location)
    for file in glob.glob("*.wav"):
        print "omer"
        mfcc = MFCC.extract_mfcc(file)
        mfcc = np.hstack([np.ones((mfcc.shape[0], 1)), mfcc])

        print mfcc.shape
        with open("/home/omer/Desktop/Echo/Machine_Learning/Data_Gunshot.csv",
                  'a') as f_handle:
            np.savetxt(f_handle, mfcc, delimiter=",")
def calculate_within_cluster_distance(filepath):
    feature_vectors = list()
    samples = os.listdir(filepath)
    for sample in samples:
        feature_vectors.append(
            MFCC.voice_feature_extraction(os.path.join(filepath, sample)))

    distances = list()
    for i in range(0, len(feature_vectors)):
        for j in range(i + 1, len(feature_vectors)):
            distances.append(
                DTW.dynamic_time_warping(feature_vectors[i],
                                         feature_vectors[j]))

    return np.max(distances)
Ejemplo n.º 22
0
def test(signal, fs, feat_list):
    mean = np.average(signal)
    energy = np.sum(np.abs(signal - mean))
    signal = signal / energy * 100

    mfcc = MFCC.MFCC(signal, fs, Frame_Len, Hop_Len)
    digit = 0
    c = DTW.DTW(mfcc, feat_list[0])
    dis = c

    for i in range(0, len(feat_list)):
        c = DTW.DTW(mfcc, feat_list[i])
        if c < dis:
            digit = i
            dis = c

    return digit, dis
Ejemplo n.º 23
0
def write_to_csv(fold0, fold_n, location):
    count = 0
    for i in range(fold0, fold_n):
        path = location + st(
            i)  #"/home/Desktop/UrbanSound8K/UrbanSound8K/audio/fold"  +str(i)
        os.chdir(path)

        print path
        for file in glob.glob("*.wav"):
            if (file.split('-')[1] == '6'):
                mfcc = MFCC.extract_mfcc(file)
                mfcc = np.hstack([np.ones((mfcc.shape[0], 1)), mfcc])

                with open(
                        "/home/omer/Desktop/UrbanSound8K/UrbanSound8K/audio/Data_Gunskhdot.csv",
                        'a') as f_handle:
                    np.savetxt(f_handle, mfcc, delimiter=",")
Ejemplo n.º 24
0
	def test_mfcc(self):
		List=[[ -1.58999199e+02  , 8.34436590e+00,  -4.44382643e+01,  -1.05713490e+01,
   -4.14216808e+00  , 5.43735320e+00  ,-6.23641973e+00,   1.13643816e+01,
    1.11168843e+01   ,2.09593413e+01  , 2.08886976e+01 ,  1.78893376e+01,
   -1.85126261e+00   ,1.98630431e+00  ,-3.58780406e+00  , 1.07466142e+01,
    4.06712767e+00 , -3.77452706e+00  ,-9.57172794e+00   ,2.71010408e+00,
    2.28370949e-01  ,-1.67914367e+00  ,-2.70335598e+00   ,9.36659239e+00,
   -1.06643306e+00 , -4.19447993e+00  ,-1.55310523e+00   ,9.63509903e+00,
   -2.36770851e+00  , 1.16768921e+00   ,1.74342284e+00 , -6.92783306e-01,
   -2.74215299e+00  , 7.46808225e+00  ,-3.92998483e+00  ,-1.10826282e+00,
    2.49712828e+00,  -1.59097153e+00 , -5.17096235e+00  , 3.18161592e+00,
   -4.68084505e+00 ,  4.28643721e+00,  -3.98783991e-01  ,-4.31620744e+00,
    1.85530792e+00  , 1.94520311e+00  ,-3.32610635e+00  , 5.60897361e+00,
   -1.59248264e+00   ,3.31523211e+00  , 3.20098072e-01  , 3.58511203e-01,
    3.37264297e+00,  -1.70320401e+00 , -1.18435935e-01  , 1.40946029e+00,
   -4.82136239e+00 ,  3.66574126e+00,  -1.98897953e+00   ,1.42700455e+00]]
		self.assertListEqual(List,list( MFCC.extract_mfcc("/home/omer/Desktop/UrbanSound8K/UrbanSound8K/audio/fold1/7061-6-0-0.wav")))
Ejemplo n.º 25
0
def runHMM(file_path):
    models = {}
    for label in CLASS_LABELS:
        with open(os.path.join("Models", label + ".pkl"), "rb") as file:
            models[label] = pk.load(file)

    with open("Models/kmeans.pkl", "rb") as file:
        kmeans = pk.load(file)

    sound_mfcc = MFCC.get_mfcc(file_path)
    sound_mfcc = kmeans.predict(sound_mfcc).reshape(-1, 1)

    evals = {
        cname: model.score(sound_mfcc, [len(sound_mfcc)])
        for cname, model in models.items()
    }
    conclusion = max(evals.keys(), key=(lambda k: evals[k]))

    return evals, conclusion
Ejemplo n.º 26
0
 def get_mfcc_from_melspec(self,
                           melspec,
                           deltamfcc=True,
                           avelocalframes=True,
                           stdlocalframes=True):
     '''Extract MFCC stats from mel-spectrogram.
     '''
     mf = mfc.MFCCs()
     mfcc = mf.get_mfccs_from_melspec(melspec=melspec, melsr=self.framessr)
     if deltamfcc:
         ff = mfcc
         ffdiff = np.diff(ff, axis=1)
         ffdelta = np.concatenate((ffdiff, ffdiff[:, -1, None]), axis=1)
         frames = np.concatenate([ff, ffdelta], axis=0)
         mfcc = frames
     if avelocalframes:
         mfcc = self.average_local_frames(mfcc, getstd=stdlocalframes)
     mfcc = pd.DataFrame(mfcc.T)
     return mfcc
def feature_get(input_files_list,feature_save_list):
    #feature_extractors = {mfcc._extractor,pitch_based._extractor}
    f = open(input_files_list,'r')
    input_audio_files = f.readlines()
    f.close()
    f = open(feature_save_list,'r')
    save_files = f.readlines()
    f.close()
    i = 0
    for audio_file,save_file in zip(input_audio_files,save_files):
        audio_file = audio_file.strip()
        save_file = save_file.strip()
        marks = get_segment_energy_marks(audio_file)
        feature1 = mfcc._extractor(audio_file,n_mfcc=13,n_fft=200,hop_length=80)
        feature2 = pitch_based._extractor(audio_file,window_length = 200,hop_length = 80)
        features = combine_feature([feature1,feature2])
        features = get_segment_feature(features,marks)
        save_features(features,save_file)
        print(i)
        i = i+1
def add_to_database(url_, person_name_):
    gmm_models = {}

    if os.path.isfile('mfcc.mat'):
        gmm_models = sio.loadmat('mfcc.mat')
    print "Recording and processing...\n\n"
    full_sound_model = read_radio_stream(url_)

    wav.write('People\\'+person_name_+'.wav', 11025, full_sound_model/32767.0)

    print "Calculating MFCC and saving the model..."
    mfcc_features = MFCC.extract(full_sound_model)
    mfcc_features = mfcc_features[:, 1:]

    g = mixture.GMM(n_components=128)
    g.fit(mfcc_features)
    model = np.array([g.means_, g.covars_, np.repeat(g.weights_[:, np.newaxis], 12, 1)])  # weights have to be repeated to properly save the np array



    print len(g.means_)

    gmm_models[person_name_] = model
    sio.savemat('mfcc_32.mat', gmm_models, oned_as='row')
Ejemplo n.º 29
0
def main_rnn(config):

    x = tensor.tensor3('features')
    y = tensor.matrix('targets')

#    if 'LSTM' in config['model'] :
#        from models import getLSTMstack
#        y_hat = getLSTMstack(input_dim=13, input_var=x, depth=int(config['model'][-1]))
#    else :
#        raise Exception("These are not the LSTM we are looking for")

#    y_hat = model.apply(x)
    

    emitter = TestEmitter()
#    emitter = TrivialEmitter(readout_dim=config['lstm_hidden_size'])

#    cost_func = SquaredError()

 #   @application
 #   def qwe(self, readouts, outputs=None):
 #       print(type(self), type(readouts))
 #       x = cost_func.apply(readouts,outputs)
 #       return x
    print(type(emitter.cost))
 #   emitter.cost = qwe
  #  print(type(qwe))

    steps = 2 
    n_samples= config['target_size']

    transition = [LSTM(config['lstm_hidden_size']) for _ in range(4)]
    transition = RecurrentStack(transition,
            name="transition", skip_connections=False)

    source_names = [name for name in transition.apply.states if 'states' in name]

    readout = Readout(emitter, readout_dim=config['lstm_hidden_size'], source_names=source_names,feedback_brick=None, merge=None, merge_prototype=None, post_merge=None, merged_dim=None)

    seqgen = SequenceGenerator(readout, transition, attention=None, add_contexts=False)
    seqgen.weights_init = IsotropicGaussian(0.01)
    seqgen.biases_init = Constant(0.)
    seqgen.push_initialization_config()

    seqgen.transition.biases_init = IsotropicGaussian(0.01,1)
    seqgen.transition.push_initialization_config()
    seqgen.initialize()

    states = seqgen.transition.apply.outputs
    print('states',states)
    states = {name: shared_floatx_zeros((n_samples, config['lstm_hidden_size']))
        for name in states}

    cost_matrix = seqgen.cost_matrix(x, **states)
    cost = cost_matrix.mean()
    cost.name = "nll"

    cg = ComputationGraph(cost)
    model = Model(cost)
    #Cost
#    cost = SquaredError().apply(y_hat ,y)
    #cost = CategoricalCrossEntropy().apply(T.flatten(),Y)
 #   

        #for sampling
    #cg = ComputationGraph(seqgen.generate(n_steps=steps,batch_size=n_samples, iterate=True))
  

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=config['learning_rate']))



    #Getting the stream
    train_stream = MFCC.get_stream(config['batch_size'],config['source_size'],config['target_size'],config['num_examples'])


    #Monitoring stuff
    extensions = [Timing(),
                  FinishAfter(after_n_batches=config['num_batches']),
                  #DataStreamMonitoring([cost, error_rate],test_stream,prefix="test"),
                  TrainingDataMonitoring([cost], prefix="train", every_n_batches=1),
                  #Checkpoint(save_to),
                  ProgressBar(),
                  Printing(every_n_batches=1)]
   

    main_loop = MainLoop(
        algorithm,
        train_stream,
 #       model=model,
        extensions=extensions)

    main_loop.run()
Ejemplo n.º 30
0
def main():
    folders = os.listdir("wav")
    X = []
    y = []
    Xtest = []
    ytest = []
    nspeakers = 5
    #feature extraction

    for i in range(nspeakers):
        folder = folders[i]
        files = [
            f for f in glob.glob("wav/" + folder + "/" + "**/*.wav",
                                 recursive=True)
        ]

        sztraining = int(len(files) * 0.6)
        for fid in range(sztraining):
            sample_rate, signal = wav.read(files[fid])
            mfcc = MFCC.main(signal, sample_rate)
            for j in range(len(mfcc)):
                X.append([])
                for k in range(len(mfcc[j])):
                    X[-1].append(mfcc[j][k])
                y.append(i)
        for fid in range(sztraining, len(files)):
            sample_rate, signal = wav.read(files[fid])
            mfcc = MFCC.main(signal, sample_rate)
            for j in range(len(mfcc)):
                Xtest.append([])
                for k in range(len(mfcc[j])):
                    Xtest[-1].append(mfcc[j][k])
                ytest.append(i)

    y = np.array(y)
    X = np.array(X)
    ytest = np.array(ytest)
    Xtest = np.array(Xtest)
    input_layer_size = 390
    hidden_layer_size = 200
    num_labels = nspeakers

    lmbda = 1
    initial_theta1 = ml.randInitializeWeights(input_layer_size,
                                              hidden_layer_size)
    initial_theta2 = ml.randInitializeWeights(hidden_layer_size, num_labels)
    nn_initial_params = np.hstack(
        (initial_theta1.ravel(order='F'), initial_theta2.ravel(order='F')))

    print(
        ml.nnCostFunc(nn_initial_params, input_layer_size, hidden_layer_size,
                      num_labels, X, y, lmbda))
    theta_opt = opt.fmin_cg(maxiter=50,
                            f=ml.nnCostFunc,
                            x0=nn_initial_params,
                            fprime=ml.nnGrad,
                            args=(input_layer_size, hidden_layer_size,
                                  num_labels, X, y.flatten(), lmbda))

    theta1_opt = np.reshape(
        theta_opt[:hidden_layer_size * (input_layer_size + 1)],
        (hidden_layer_size, input_layer_size + 1), 'F')
    theta2_opt = np.reshape(
        theta_opt[hidden_layer_size * (input_layer_size + 1):],
        (num_labels, hidden_layer_size + 1), 'F')

    pred = ml.predict(theta1_opt, theta2_opt, Xtest, ytest)
    print(np.mean(pred == ytest.flatten()) * 100)
    savemat('ml.mat', {
        'theta1': theta1_opt,
        'theta2': theta2_opt
    })
Ejemplo n.º 31
0
# python -i <name of this .py file>
import numpy as np
from scikits.audiolab import Sndfile

SOUND_DIRECTORY = 'small_data_sample/right_whale'

test_file = '%s/train12.aiff' % SOUND_DIRECTORY

f = Sndfile(test_file, 'r')

# Sndfile instances can be queried for the audio file meta-data
fs = f.samplerate
nc = f.channels
enc = f.encoding

# Reading is straightfoward
data = f.read_frames(1000)

# This reads the next 1000 frames, e.g. from 1000 to 2000, but as single precision
data_float = f.read_frames(1000, dtype=np.float32)
print data_float.shape

import MFCC
# data_float is a wave signal saved in a 1-D numpy array
# mfcc is a 2-D numpy array, where each row is the
# MFCC of a frame in data_float
mfcc = MFCC.extract(data_float, show = True)
# This will also plot the MFCC and the spectrogram
# reconstructed from MFCC by inverse DCT

def collect(n=20):
    obs = []
    for i in xrange(n):
        os.system("arecord -f S16_LE --rate=44100 -D hw:1,0 -d 3 test.wav")
        obs.append(MFCC.extract(wavfile.read("test.wav")[1]))
    return obs
Ejemplo n.º 33
0
 def mfcc(self, m, NumFilters=48):
     """
     Compute the Mth Mel-Frequency Cepstral Coefficient
     """
     return MFCC.mfcc(self, m, NumFilters)
def GMM_test(ii):
    speakers_MFCC_dict = {}
    speaker_GMM_dict = {}
    files = glob.glob(os.getcwd()+'\\speakers\\*.wav')
    gauss_num = 32
    iterator = 1
    num_iter = ii


    if os.path.isfile('mfcc_'+str(gauss_num)+'.mat'):
        speaker_GMM_dict = sio.loadmat('mfcc_'+str(gauss_num)+'.mat')
        speaker_GMM_dict.pop('__header__')
        speaker_GMM_dict.pop('__version__')
        speaker_GMM_dict.pop('__globals__')
    else:
        for file in files:
            #print(file)
            if file[-6:-4] == '00':   #file[len(file)-12:len(file)-9]
                current_speaker = file[len(file)-10:len(file)-6]
                print("############# Calculate MFCC and GMM for ", current_speaker, " , speaker no ", str(iterator))
                #if iterator == 572:
                #    print("Tu bedzie error")

                iterator += 1
                merged_files = np.array([])
                for i in range(0, 9):
                    current_file = wav.read(file[:-5]+str(i)+file[-4:])
                    merged_files = np.append(merged_files, current_file[1])
                #print(type(merged_files))
                speaker_MFCC = MFCC.extract(merged_files)
                speaker_MFCC = speaker_MFCC[:, 1:]

                speakers_MFCC_dict[current_speaker] = speaker_MFCC
                g = mixture.GMM(n_components=gauss_num, n_iter=num_iter)
                g.fit(speaker_MFCC)

                speaker_model = np.array([g.means_, g.covars_, np.repeat(g.weights_[:, np.newaxis], 12, 1)])
                speaker_GMM_dict[current_speaker] = speaker_model


        sio.savemat('mfcc_'+str(gauss_num)+'.mat', speaker_GMM_dict, oned_as='row')


    iterator = 1
    good = 0
    bad = 0
    total = 0

    for file in files:
        if file[-6:-4] == '09':
            g = mixture.GMM(n_components=gauss_num, n_iter=num_iter)
            current_file = wav.read(file)
            current_speaker = file[len(file)-10:len(file)-6]
            #print(current_speaker, )
            speaker_MFCC = MFCC.extract(current_file[1])
            speaker_MFCC = speaker_MFCC[:, 1:]
            log_prob = -10000
            winner = 'nobody'
            for key, values in speaker_GMM_dict.items():
                try:
                    g.means_ = values[0, :, :]
                    g.covars_ = values[1, :, :]
                    g.weights_ = values[2, :, 1]
                    temp_prob = np.mean(g.score(speaker_MFCC))
                    if temp_prob > log_prob:
                        log_prob = temp_prob
                        winner = key
                except TypeError:
                    print('error for ', key)
            if current_speaker == winner:
                good += 1
            else:
                bad += 1
            total +=1
            print(current_speaker, " speaker no ", str(iterator), " is similar to ", winner, " - log prob = ", str(log_prob))
            print("good = ", str(good), ", bad = ", str(bad), ", total = ", str(total))
            iterator += 1

    print("GMM, n_iter = ", num_iter, ", Efficiency = ", str(good/total))
Ejemplo n.º 35
0
def get_mfcc_worker(fpath):
    print('mfcc: ' + fpath)
    fs, signal = wavfile.read(fpath)
    mfcc = MFCC.extract(fs, signal)
    return mfcc[:1500]
Ejemplo n.º 36
0
def get_mfcc_worker(fpath):
    print('mfcc: ' + fpath)
    fs, signal = wavfile.read(fpath)
    mfcc = MFCC.extract(fs, signal)
    return mfcc[:1500]
Ejemplo n.º 37
0
def collect(n=20):
    obs = []
    for i in xrange(n):
        os.system("arecord -f S16_LE --rate=44100 -D hw:1,0 -d 3 test.wav")
        obs.append(MFCC.extract(wavfile.read("test.wav")[1]))
    return obs
Ejemplo n.º 38
0
def template(signal, fs):
    mean = np.average(signal)
    energy = np.sum(np.abs(signal - mean))
    signal = signal / energy * 100
    mfcc = MFCC.MFCC(signal, fs, Frame_Len, Hop_Len)
    return mfcc
import MFCC
import os
name_list = ["Uesaka_Sumire_Anime","Komatsu_Mikako_Anime","Okubo_Rumi_Anime","Takamori_Natsumi_Anime","Mikami_Shiori_Anime"]

for g in name_list:
	files = os.listdir("./" + g + "/")
	for f in files:
		MFCC.create_ceps("./" + g + "/" + f)
Ejemplo n.º 40
0
 def mfcc2(self, numFilters = 32):
     """
     Vectorized MFCC implementation
     """
     return MFCC.mfcc2(self, numFilters)
Ejemplo n.º 41
0
add_to_database(fileParts, noteN, data)
#print(data.shape)
#print(data)
#sd.play(data, fs)
inst = 'alto_flute'
orchestra = {inst: {'data': data}}

M = len(data)
spectrum = np.fft.fft(orchestra[inst]['data'], axis=0)[:M // 2 + 1:-1]
spectrum = np.abs(spectrum)
S = 20 * np.log10(spectrum)
frq = 30

#mfcc_data=librosa.feature.mfcc(y=data,sr=rate,n_mfcc=12,n_fft=int(M),hop_length=int(M+2))[:,0]
mfcc_data, centroid = MFCC.custom_mfcc(data)
LpcLocs, LpcFreqs = lpc_coeffs.lpc_coeffs(data)

# LPC=librosa.lpc(data, lpc_coeffs)
# f,h=freqz(1,LPC, worN=lpc_coeffs, fs=fs)
# h=20 * np.log10(np.abs(h))

A = np.linspace(0, len(spectrum), 101)
#print("mfccs:")
#print(mfcc_data.shape)
#print(mfcc_data)
#print(centroid)
#peaks, _ = findPeaks(S, distance=frq, prominence=20, height=-10)
idx, peaks = findPeaks.peaks(S, noteN)
frq, thr = maskingCurve.maskingCurve(S, noteN)
#peaks = find_peaks_cwt(S,np.arange(1,fs/2+1))
Ejemplo n.º 42
0
    speech = Speech()
    xx, fs = speech.audioread(filename, 8000)
    xx = xx - np.mean(xx)  # DC
    x = xx / np.max(xx)  # normalized
    N = len(x)
    time = np.arange(N) / fs
    noisy = Noisy()
    signal, _ = noisy.Gnoisegen(x, SNR)  # add noise
    wnd = np.hamming(wlen)  # window function
    overlap = wlen - inc
    NIS = int((IS * fs - wlen) / inc + 1)  # unvoice segment frame number
    y = speech.enframe(signal, list(wnd), inc).T
    fn = y.shape[1]  # frame number
    frameTime = speech.FrameTime(fn, wlen, inc, fs)  # frame to time

    Mfcc = MFCC()
    ccc = Mfcc.mfcc(signal, fs, 16, wlen, inc)  # MFCC
    fn1 = ccc.shape[0]  # frame number
    frameTime1 = frameTime[2:fn - 2]
    Ccep = ccc[:, 0:16]  # MFCC coefficient
    C0 = np.mean(
        Ccep[0:5, :],
        axis=0)  # calculate approximate average noise MFCC coefficient
    Dcep = np.zeros(fn)
    for i in range(5, fn1):
        Cn = Ccep[i, :]  # one frame MFCC cepstrum coefficient
        Dstu = 0
        for k in range(16):  # calculate the MFCC cepstrum distance
            Dstu += (Cn[k] - C0[k])**2  # between each frame and noise
        Dcep[i] = np.sqrt(Dstu)
    Dcep[0:5] = Dcep[5]
	def CalculateMFCCs(self):
		# This function calculates and returns the MFCC from the given wavfile
		mfccs = MFCC.extract(self.wav_data)
		return mfccs
Ejemplo n.º 44
0
 ubm_dir = 'train_data_for_UBM'
 ubm_data_dirs = os.listdir(ubm_dir)
 dim = 12
 sig = np.array([])
 features_M = np.ndarray(shape = (0,dim), dtype = 'float64')
 features_F = np.ndarray(shape = (0,dim), dtype = 'float64')
 features = np.ndarray(shape = (0,dim), dtype = 'float64')
 wav = mywave()
 print 'hello'
 for ubm_data_dir in ubm_data_dirs:
     print 'hello'
     print ubm_data_dir
     if ubm_data_dir == '.DS_Store':
         continue			
     sig = wav.WaveRead(ubm_dir+r'/'+ubm_data_dir)
     MFCC_obj = MFCC(40,12,300,3400,0.97,16000,50,0.0256,256)
     MFCC_coef = MFCC_obj.sig2s2mfc(sig)
     #energy = np.ndarray(shape = (MFCC_coef.shape[0],1),dtype = 'float64')
     #energy[:,0] = 10*numpy.log10((MFCC_coef**2).sum(axis=1))
     #MFCC_coef = np.hstack((MFCC_coef,energy))
     """
     dtm1 = np.ndarray(shape = MFCC_coef.shape,dtype = 'float64' ) 
     #初始化dtm1
     dtm1[0:2,:] = 0
     dtm1[MFCC_coef.shape[0]-2:MFCC_coef.shape[0],:] = 0;  
     #计算dtm1
     for loop2 in range(2,MFCC_coef.shape[0]-2):
         dtm1[loop2,:] = -2*MFCC_coef[loop2-2,:]-MFCC_coef[loop2-1,:]+MFCC_coef[loop2+1,:]+2*MFCC_coef[loop2+2,:]
     dtm1 = dtm1/3;
     dtm2 = np.ndarray(shape = MFCC_coef.shape,dtype = 'float64' )
     #初始化dtm2
Ejemplo n.º 45
0
 ubm_dir = 'train_data_for_UBM'
 ubm_data_dirs = os.listdir(ubm_dir)
 dim = 12
 sig = np.array([])
 features_M = np.ndarray(shape=(0, dim), dtype='float64')
 features_F = np.ndarray(shape=(0, dim), dtype='float64')
 features = np.ndarray(shape=(0, dim), dtype='float64')
 wav = mywave()
 print 'hello'
 for ubm_data_dir in ubm_data_dirs:
     print 'hello'
     print ubm_data_dir
     if ubm_data_dir == '.DS_Store':
         continue
     sig = wav.WaveRead(ubm_dir + r'/' + ubm_data_dir)
     MFCC_obj = MFCC(40, 12, 300, 3400, 0.97, 16000, 50, 0.0256, 256)
     MFCC_coef = MFCC_obj.sig2s2mfc(sig)
     #energy = np.ndarray(shape = (MFCC_coef.shape[0],1),dtype = 'float64')
     #energy[:,0] = 10*numpy.log10((MFCC_coef**2).sum(axis=1))
     #MFCC_coef = np.hstack((MFCC_coef,energy))
     """
     dtm1 = np.ndarray(shape = MFCC_coef.shape,dtype = 'float64' ) 
     #初始化dtm1
     dtm1[0:2,:] = 0
     dtm1[MFCC_coef.shape[0]-2:MFCC_coef.shape[0],:] = 0;  
     #计算dtm1
     for loop2 in range(2,MFCC_coef.shape[0]-2):
         dtm1[loop2,:] = -2*MFCC_coef[loop2-2,:]-MFCC_coef[loop2-1,:]+MFCC_coef[loop2+1,:]+2*MFCC_coef[loop2+2,:]
     dtm1 = dtm1/3;
     dtm2 = np.ndarray(shape = MFCC_coef.shape,dtype = 'float64' )
     #初始化dtm2
Ejemplo n.º 46
0
 def mfcc2(self, numFilters=32):
     """
     Vectorized MFCC implementation
     """
     return MFCC.mfcc2(self, numFilters)
def read_radio_stream(url_):

    database = sio.loadmat('mfcc_16_fft256_GMM.mat')
    database.pop('__header__')
    database.pop('__version__')
    database.pop('__globals__')

    r2 = urllib.urlopen(url_)
    pygame.mixer.init(44100, -16, 2, 2048)
    print pygame.mixer.get_init()
    chan1 = pygame.mixer.find_channel()

    format = sound.AFMT_S16_LE
    print sound.getODevices()
    #snd_out = sound.Output(44100, 2, format)

    dm = muxer.Demuxer('mp3')
    dec = None
    snd = None

    print(r2.info())
    print('###################\n')

    #f = open('radio.mp3', 'wb')
    #g = open('radio.wav', 'wb')
    i = 0
    while True:  #i < 3:

        samples = r2.read(15000)

        frames = dm.parse(samples)

        if dec is None:
            # Open decoder
            dec = acodec.Decoder(dm.streams[0])
        

        #start = time.time()
        sound_np_array = ansic_to_numpy(frames, dec)
        #print (sound_np_array.shape[0])/44100.0
        #elapsed = (time.time() - start)
        #print 'decode and ndaray - %2.8f' %elapsed
        
        #start = time.time()
        to_play = np.array(np.repeat(sound_np_array[:, np.newaxis], 2, 1), dtype = 'int16')
        sounds = pygame.sndarray.make_sound(to_play)
        chan1.queue(sounds)
        #elapsed = (time.time() - start)
        #print 'to play - %2.8f' %elapsed

        #start = time.time()
        sound_np_array = decimate(sound_np_array, 4)
        #elapsed = (time.time() - start)
        #print 'downsample - %2.8f' %elapsed

        #start = time.time()
        mfcc_features = MFCC.extract(sound_np_array) #1.5s
        mfcc_features = mfcc_features[:, 1:]
        #elapsed = (time.time() - start)
        #print 'mfcc - %2.8f' %elapsed


        g = mixture.GMM(n_components=16)
        log_prob = -10000
        winner = 'nobody'

        for key, values in database.iteritems():
            try:
                g.means_ = values[0, :, :]
                g.covars_ = values[1, :, :]
                g.weights_ = values[2, :, 1]
                
                #start = time.time()
                temp_prob = np.mean(g.score(mfcc_features))
                #elapsed = (time.time() - start)
                #print 'log-likelihood - %2.8f' %elapsed
                
                if temp_prob > log_prob:
                    log_prob = temp_prob
                    winner = key
            except TypeError:
                print 'error dla ', key

        print winner, log_prob

    print('\n###################')
Ejemplo n.º 48
0

if __name__ == '__main__':
	print __doc__

	ubms_dir = 'ubms'
	speaker_model_dir = 'adaption'
	if not os.path.exists(speaker_model_dir):
		os.mkdir(speaker_model_dir)

	train_data_dir = 'train_data'
	train_data = os.listdir(train_data_dir)
	wav = mywave.mywave()
	for train_wav in train_data:
		print train_wav
		wave_data = wav.WaveRead(train_data_dir+r'/'+train_wav)
		MFCC_obj = MFCC(40,12,300,3400,0.97,16000,50,0.0256,256)
		MFCC_coef = MFCC_obj.sig2s2mfc(wave_data)
		adapted_gmm = GMM()
		if train_wav[-5] == 'M':
			adapted_gmm.read(ubms_dir+r'/ubm_M')
		elif train_wav[-5] == 'F':
			adapted_gmm.read(ubms_dir+r'/ubm_F')
		else:
			print 'train_wav name unexpected'

		adapted_gmm.adapt(MFCC_coef)
		adapted_gmm.write(speaker_model_dir+r'/'+train_wav)


Ejemplo n.º 49
0
 def mfcc(self, m, NumFilters = 48):
     """
     Compute the Mth Mel-Frequency Cepstral Coefficient
     """
     return MFCC.mfcc(self, m, NumFilters)