def main(): check_dir('eval') test = wav16khz2mfcc('eval') P_t = 0.5 P_nt = 1 - P_t fname = 'GMM_model.pkl' if len(sys.argv) > 1: fname = sys.argv[1] #choose one model with open(fname, 'rb') as f: Ws_t, MUs_t, COVs_t, Ws_nt, MUs_nt, COVs_nt = pickle.load(f) for tst in sorted(test.keys()): ll_t = logpdf_gmm(test[tst], Ws_t, MUs_t, COVs_t) ll_nt = logpdf_gmm(test[tst], Ws_nt, MUs_nt, COVs_nt) scr = (sum(ll_t) + np.log(P_t)) - (sum(ll_nt) + np.log(P_nt)) tst = tst.split("/")[-1].split(".")[0] if scr >= 0: print(tst, scr, 1) else: print(tst, scr, 0)
def classification(evalData, trainClasses, weights, meanValues, covarMatrices): print("STEP5: Classification started.") file = open("audio_GMM.txt", "w") # For every person to evaluate, calculate the sum of LLs for evaluation data for evalPerson in evalData: llVals = {} name = evalPerson.split('/')[3] name = name.split('.')[0] print("Classifing person ", name) file.write(name) file.write(' ') for trainPerson in trainClasses: llVals[trainPerson] = sum( logpdf_gmm(evalData[evalPerson], weights[trainPerson], meanValues[trainPerson], covarMatrices[trainPerson])) llNonTarget = llVals["non-target"] llTarget = llVals["target"] softScore = (llTarget + np.log(0.5)) - (llNonTarget + np.log(0.5)) file.write(str(softScore)) file.write(' ') # Hard decision if (softScore > 500): file.write('1') else: file.write('0') file.write('\n') file.close() print("STEP5 Done: Classification ended.\n") print( "Classification is finished. Check file \'audio_GMM.txt\' for the results." )
ws2 = np.ones(m2) / m2 #fig = plt.figure() #ims = [] # Run 30 iterations of EM algorithm to train the two GMM models for i in range(30): plt.plot(x1[:,0], x1[:,1], 'r.', x2[:,0], x2[:,1], 'b.') for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10)) for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10)) ws1, mus1, covs1, ttl1 = train_gmm(x1, ws1, mus1, covs1) ws2, mus2, covs2, ttl2 = train_gmm(x2, ws2, mus2, covs2) print('Total log-likelihood: %s for class X1; %s for class X2' % (ttl1, ttl2)) plt.show() hard_decision = lambda x: logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) > logpdf_gmm(x, ws2, mus2, covs2) + np.log(p2) plot2dfun(hard_decision, ax, 500) plt.plot(x1[:,0], x1[:,1], 'r.') plt.plot(x2[:,0], x2[:,1], 'b.') for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10)) for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10)) plt.figure() x1_posterior = lambda x: logistic_sigmoid(logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) - logpdf_gmm(x, ws2, mus2, covs2) - np.log(p2)) plot2dfun(x1_posterior, ax, 500) plt.plot(x1[:,0], x1[:,1], 'r.') plt.plot(x2[:,0], x2[:,1], 'b.') for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10)) for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10)) plt.show()
Ws_m = np.ones(M_m) / M_m # Initialize parameters of feamele model M_f = 5 MUs_f = train_f[randint(1, len(train_f), M_f)] COVs_f = [np.var(train_f, axis=0)] * M_f Ws_f = np.ones(M_f) / M_f # Run 30 iterations of EM algorithm to train the two GMMs from males and females for jj in range(30): [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m) [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f) print('Iteration:', jj, ' Total log-likelihood:', TTL_m, 'for males;', TTL_f, 'for frmales') # Now run recognition for all male test utterances # To do the same for females set "test_set=test_f" score = [] for tst in test_m: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) print(score) score = [] for tst in test_f: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) print(score)
def main(): check_dir(os.path.dirname(negative_test_path)) check_dir(os.path.dirname(negative_train_path)) check_dir(os.path.dirname(positive_test_path)) check_dir(os.path.dirname(positive_train_path)) train_m = list(wav16khz2mfcc(positive_train_path).values()) train_f = list(wav16khz2mfcc(negative_train_path).values()) test_m = list(wav16khz2mfcc(positive_test_path).values()) test_f = list(wav16khz2mfcc(negative_test_path).values()) train_m = np.vstack(train_m) train_f = np.vstack(train_f) dim = train_m.shape[1] cov_tot = np.cov(np.vstack([train_m, train_f]).T, bias=True) d, e = scipy.linalg.eigh(cov_tot, eigvals=(dim - 2, dim - 1)) train_m_pca = train_m.dot(e) train_f_pca = train_f.dot(e) # Classes are not well separated in 2D PCA subspace n_m = len(train_m) n_f = len(train_f) cov_wc = (n_m * np.cov(train_m.T, bias=True) + n_f * np.cov(train_f.T, bias=True)) / (n_m + n_f) cov_ac = cov_tot - cov_wc d, e = scipy.linalg.eigh(cov_ac, cov_wc, eigvals=(dim - 1, dim - 1)) # Lets define uniform a-priori probabilities of classes: P_m = 0.5 P_f = 1 - P_m ll_m = logpdf_gauss(test_m[0], np.mean(train_m, axis=0), np.var(train_m, axis=0)) ll_f = logpdf_gauss(test_m[0], np.mean(train_f, axis=0), np.var(train_f, axis=0)) posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m + np.exp(ll_f) * P_f) ll_m = logpdf_gauss(test_m[0], *train_gauss(train_m)) ll_f = logpdf_gauss(test_m[0], *train_gauss(train_f)) # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss' posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m + np.exp(ll_f) * P_f) plt.figure() plt.plot(posterior_m, 'b') plt.plot(1 - posterior_m, 'r') plt.figure() plt.plot(ll_m, 'b') plt.plot(ll_f, 'r') # Again gaussian models with full covariance matrices. Now testing a female utterance ll_m = logpdf_gauss(test_f[1], *train_gauss(train_m)) ll_f = logpdf_gauss(test_f[1], *train_gauss(train_f)) # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss' posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m + np.exp(ll_f) * P_f) plt.figure() plt.plot(posterior_m, 'b') plt.plot(1 - posterior_m, 'r') plt.figure() plt.plot(ll_m, 'b') plt.plot(ll_f, 'r') score = [] mean_m, cov_m = train_gauss(train_m) mean_f, cov_f = train_gauss(train_f) for tst in test_m: ll_m = logpdf_gauss(tst, mean_m, cov_m) ll_f = logpdf_gauss(tst, mean_f, cov_f) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) # Run recognition with 1-dimensional LDA projected data score = [] mean_m, cov_m = train_gauss(train_m.dot(e)) mean_f, cov_f = train_gauss(train_f.dot(e)) for tst in test_m: ll_m = logpdf_gauss(tst.dot(e), mean_m, np.atleast_2d(cov_m)) ll_f = logpdf_gauss(tst.dot(e), mean_f, np.atleast_2d(cov_f)) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) M_m = 12 MUs_m = train_m[randint(1, len(train_m), M_m)] COVs_m = [np.var(train_m, axis=0)] * M_m Ws_m = np.ones(M_m) / M_m M_f = 7 MUs_f = train_f[randint(1, len(train_f), M_f)] COVs_f = [np.var(train_f, axis=0)] * M_f Ws_f = np.ones(M_f) / M_f for jj in range(100): [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m) [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f) score = [] testok = 0 testnok = 0 for tst in test_m: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)) score.append(scr) if scr >= 0: testok += 1 else: testnok += 1 print("target is " + str(testok / (testok + testnok))) score = [] testok = 0 testnok = 0 for tst in test_f: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)) score.append(scr) if scr < 0: testok += 1 else: testnok += 1 print("non target is " + str(testok / (testok + testnok))) print('Saved as "GMM_model.pkl"') with open('GMM_model.pkl', 'wb') as f: pickle.dump([Ws_m, MUs_m, COVs_m, Ws_f, MUs_f, COVs_f], f)
covs[index] = [cov[index]] * m ws[index] = np.ones(m) / m ttl = [None] * len(train_sample) # Run 110 iterations of EM algorithm to train GMM models for i in range(110): for index in xrange(len(train_sample)): ws[index], mus[index], covs[index], ttl[index] = train_gmm( train_sample[index], ws[index], mus[index], covs[index]) ll_c = [None] * 2 ll_c_new = [None] * 2 f = open("gmm_speech.txt", "w") for test in xrange(len(real_test_sample)): for index in xrange(len(train_sample)): ll_c[index] = sum( logpdf_gmm(real_test_sample[test], ws[index], mus[index], covs[index])) + np.log(P_c[index]) ll_index_max = np.argmax(ll_c) prob_1 = 1 - (ll_c[0] / (ll_c[0] + ll_c[1])) real_test_name[test] = real_test_name[test].replace(".wav", "") f.write(real_test_name[test].replace("data/eval/", "") + ' ' + str(prob_1) + ' ' + ("1" if ll_index_max == 0 else "0") + "\n") f.close()
n = 15 for iteration in range(n): [Ws[i], MUs[i], COVs[i], TTL] = train_gmm(train, Ws[i], MUs[i], COVs[i]) print("Training iteration: {}/{}, total log-likelihood: {}".format(iteration + 1, n, TTL)) errors = 0 trials = 0 for i in range(NUM_CLASSES): id = i + 1 test = list(wav16khz2mfcc("dev/{}".format(id)).values()) for j, test_data in enumerate(test): log_lh = [] for ii in range(NUM_CLASSES): log_lh.append(sum(logpdf_gmm(test_data, Ws[ii], MUs[ii], COVs[ii]))) winning_class_ind = np.argmax(log_lh) print("Correct class {} | Winning class {} with value {}".format(i + 1, winning_class_ind + 1, log_lh[winning_class_ind])) errors = (errors + 1) if not ((winning_class_ind) == i) else errors trials += 1 print("------------------------------------------") print("False predictions: {} out of {}.".format(errors, trials)) print("Error ratio: {}".format(errors/trials)) print("------------------------------------------") print("Model evaluation...") with open("GMM_audio_results", "w") as f: eval = wav16khz2mfcc("eval/") eval_names = [x.split("\\")[1].split(".")[0] for x in list(eval.keys())] eval_vals = list(eval.values())
avg_eng = summ / len(test[j]) #plt.plot(train[i][0][:,0]) #plt.show() for k in range(0, len(test[j])): test[j][k][0] = test[j][k][0] - avg_eng cnt = 0 for tst in test: ll = [] for j in range(1, 32): ll.append(sum(logpdf_gmm(tst, Ws[j], MUs[j], COVs[j]))) final += str(f[cnt]) final += ' ' final += str(np.argmax(ll) + 1) final += ' ' for z in range(1, 32): final += str((sum(logpdf_gmm(tst, Ws[z], MUs[z], COVs[z])))) final += ' ' final += '\n' cnt = cnt + 1 #score.append(i == (np.argmax(ll) + 1)) output = 'output_voice'
P_t = 0.5 M_t = 64 MUs_t = t_train_sound[numpy.random.randint(1, len(t_train_sound), M_t)] COVs_t = [numpy.var(t_train_sound, axis=0)] * M_t Ws_t = numpy.ones(M_t) / M_t P_nt = 1 - P_t M_nt = M_t MUs_nt = nt_train_sound[numpy.random.randint(1, len(nt_train_sound), M_nt)] COVs_nt = [numpy.var(nt_train_sound, axis=0)] * M_nt Ws_nt = numpy.ones(M_nt) / M_nt n = 32 for i in range(n): [Ws_t, MUs_t, COVs_t, TTL_t] = train_gmm( t_train_sound, Ws_t, MUs_t, COVs_t) [Ws_nt, MUs_nt, COVs_nt, TTL_nt] = train_gmm( nt_train_sound, Ws_nt, MUs_nt, COVs_nt) print("Training iteration: " + str(i + 1) + "/" + str(n)) with open("GMM_speech_results", "w") as f: for i, eval in enumerate(eval_sound): ll_t = logpdf_gmm(eval, Ws_t, MUs_t, COVs_t) ll_nt = logpdf_gmm(eval, Ws_nt, MUs_nt, COVs_nt) val = (sum(ll_t) + numpy.log(P_t)) - (sum(ll_nt) + numpy.log(P_nt)) f.write(eval_sound_names[i] + " " + str(val) + " " + ("1" if val > 0 else "0") + "\n")