def trainGMMs(trainClasses, gaussWeights, meanValsGauss, covarMatrices): print("STEP3: Training all train data using GMM.") # In each iteraiton, we have to make one training step for each training class for iteration in range(TRAINING_ITERATIONS): print("Training iteration: ", iteration) # calculating new values for singleClass in trainClasses: weights[singleClass], meanValues[singleClass], covarMatrices[ singleClass], TTL = train_gmm(trainClasses[singleClass], weights[singleClass], meanValues[singleClass], covarMatrices[singleClass]) print("STEP3 Done: Training finished.\n") return weights, meanValues, covarMatrices
ws1 = np.ones(m1) / m1 m2 = 2 mus2 = x2[randint(1, len(x2), m2)] covs2 = [cov2] * m2 ws2 = np.ones(m2) / m2 #fig = plt.figure() #ims = [] # Run 30 iterations of EM algorithm to train the two GMM models for i in range(30): plt.plot(x1[:,0], x1[:,1], 'r.', x2[:,0], x2[:,1], 'b.') for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10)) for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10)) ws1, mus1, covs1, ttl1 = train_gmm(x1, ws1, mus1, covs1) ws2, mus2, covs2, ttl2 = train_gmm(x2, ws2, mus2, covs2) print('Total log-likelihood: %s for class X1; %s for class X2' % (ttl1, ttl2)) plt.show() hard_decision = lambda x: logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) > logpdf_gmm(x, ws2, mus2, covs2) + np.log(p2) plot2dfun(hard_decision, ax, 500) plt.plot(x1[:,0], x1[:,1], 'r.') plt.plot(x2[:,0], x2[:,1], 'b.') for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10)) for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10)) plt.figure() x1_posterior = lambda x: logistic_sigmoid(logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) - logpdf_gmm(x, ws2, mus2, covs2) - np.log(p2)) plot2dfun(x1_posterior, ax, 500) plt.plot(x1[:,0], x1[:,1], 'r.')
# Initialize all variance vectors (diagonals of the full covariance matrices) to # the same variance vector computed using all the data from the given class COVs_m = [np.var(train_m, axis=0)] * M_m # Use uniform distribution as initial guess for the weights Ws_m = np.ones(M_m) / M_m # Initialize parameters of feamele model M_f = 5 MUs_f = train_f[randint(1, len(train_f), M_f)] COVs_f = [np.var(train_f, axis=0)] * M_f Ws_f = np.ones(M_f) / M_f # Run 30 iterations of EM algorithm to train the two GMMs from males and females for jj in range(30): [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m) [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f) print('Iteration:', jj, ' Total log-likelihood:', TTL_m, 'for males;', TTL_f, 'for frmales') # Now run recognition for all male test utterances # To do the same for females set "test_set=test_f" score = [] for tst in test_m: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) print(score) score = [] for tst in test_f:
def main(): check_dir(os.path.dirname(negative_test_path)) check_dir(os.path.dirname(negative_train_path)) check_dir(os.path.dirname(positive_test_path)) check_dir(os.path.dirname(positive_train_path)) train_m = list(wav16khz2mfcc(positive_train_path).values()) train_f = list(wav16khz2mfcc(negative_train_path).values()) test_m = list(wav16khz2mfcc(positive_test_path).values()) test_f = list(wav16khz2mfcc(negative_test_path).values()) train_m = np.vstack(train_m) train_f = np.vstack(train_f) dim = train_m.shape[1] cov_tot = np.cov(np.vstack([train_m, train_f]).T, bias=True) d, e = scipy.linalg.eigh(cov_tot, eigvals=(dim - 2, dim - 1)) train_m_pca = train_m.dot(e) train_f_pca = train_f.dot(e) # Classes are not well separated in 2D PCA subspace n_m = len(train_m) n_f = len(train_f) cov_wc = (n_m * np.cov(train_m.T, bias=True) + n_f * np.cov(train_f.T, bias=True)) / (n_m + n_f) cov_ac = cov_tot - cov_wc d, e = scipy.linalg.eigh(cov_ac, cov_wc, eigvals=(dim - 1, dim - 1)) # Lets define uniform a-priori probabilities of classes: P_m = 0.5 P_f = 1 - P_m ll_m = logpdf_gauss(test_m[0], np.mean(train_m, axis=0), np.var(train_m, axis=0)) ll_f = logpdf_gauss(test_m[0], np.mean(train_f, axis=0), np.var(train_f, axis=0)) posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m + np.exp(ll_f) * P_f) ll_m = logpdf_gauss(test_m[0], *train_gauss(train_m)) ll_f = logpdf_gauss(test_m[0], *train_gauss(train_f)) # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss' posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m + np.exp(ll_f) * P_f) plt.figure() plt.plot(posterior_m, 'b') plt.plot(1 - posterior_m, 'r') plt.figure() plt.plot(ll_m, 'b') plt.plot(ll_f, 'r') # Again gaussian models with full covariance matrices. Now testing a female utterance ll_m = logpdf_gauss(test_f[1], *train_gauss(train_m)) ll_f = logpdf_gauss(test_f[1], *train_gauss(train_f)) # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss' posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m + np.exp(ll_f) * P_f) plt.figure() plt.plot(posterior_m, 'b') plt.plot(1 - posterior_m, 'r') plt.figure() plt.plot(ll_m, 'b') plt.plot(ll_f, 'r') score = [] mean_m, cov_m = train_gauss(train_m) mean_f, cov_f = train_gauss(train_f) for tst in test_m: ll_m = logpdf_gauss(tst, mean_m, cov_m) ll_f = logpdf_gauss(tst, mean_f, cov_f) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) # Run recognition with 1-dimensional LDA projected data score = [] mean_m, cov_m = train_gauss(train_m.dot(e)) mean_f, cov_f = train_gauss(train_f.dot(e)) for tst in test_m: ll_m = logpdf_gauss(tst.dot(e), mean_m, np.atleast_2d(cov_m)) ll_f = logpdf_gauss(tst.dot(e), mean_f, np.atleast_2d(cov_f)) score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))) M_m = 12 MUs_m = train_m[randint(1, len(train_m), M_m)] COVs_m = [np.var(train_m, axis=0)] * M_m Ws_m = np.ones(M_m) / M_m M_f = 7 MUs_f = train_f[randint(1, len(train_f), M_f)] COVs_f = [np.var(train_f, axis=0)] * M_f Ws_f = np.ones(M_f) / M_f for jj in range(100): [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m) [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f) score = [] testok = 0 testnok = 0 for tst in test_m: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)) score.append(scr) if scr >= 0: testok += 1 else: testnok += 1 print("target is " + str(testok / (testok + testnok))) score = [] testok = 0 testnok = 0 for tst in test_f: ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m) ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f) scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)) score.append(scr) if scr < 0: testok += 1 else: testnok += 1 print("non target is " + str(testok / (testok + testnok))) print('Saved as "GMM_model.pkl"') with open('GMM_model.pkl', 'wb') as f: pickle.dump([Ws_m, MUs_m, COVs_m, Ws_f, MUs_f, COVs_f], f)
mus = [[None] * m] * len(train_sample) covs = [None] * len(train_sample) ws = [None] * len(train_sample) for index in xrange(len(train_sample)): mus[index] = train_sample[index][randint(1, len(train_sample[index]), m)] # Initialize all covariance matrices to the same covariance matrices computed using # all the data from the given class covs[index] = [cov[index]] * m ws[index] = np.ones(m) / m ttl = [None] * len(train_sample) # Run 110 iterations of EM algorithm to train GMM models for i in range(110): for index in xrange(len(train_sample)): ws[index], mus[index], covs[index], ttl[index] = train_gmm( train_sample[index], ws[index], mus[index], covs[index]) ll_c = [None] * 2 ll_c_new = [None] * 2 f = open("gmm_speech.txt", "w") for test in xrange(len(real_test_sample)): for index in xrange(len(train_sample)): ll_c[index] = sum( logpdf_gmm(real_test_sample[test], ws[index], mus[index], covs[index])) + np.log(P_c[index]) ll_index_max = np.argmax(ll_c)
COVs = [] for i in range(NUM_CLASSES): id = i + 1 print("Loading data for class {}".format(id)) train = np.vstack(wav16khz2mfcc("train/{}".format(id)).values()) print("Training model for class {}".format(id)) M.append(32) Ws.append(np.ones(M[i]) / M[i]) MUs.append(train[np.random.randint(1, len(train), M[i])]) COVs.append([np.var(train, axis=0)] * M[i]) n = 15 for iteration in range(n): [Ws[i], MUs[i], COVs[i], TTL] = train_gmm(train, Ws[i], MUs[i], COVs[i]) print("Training iteration: {}/{}, total log-likelihood: {}".format(iteration + 1, n, TTL)) errors = 0 trials = 0 for i in range(NUM_CLASSES): id = i + 1 test = list(wav16khz2mfcc("dev/{}".format(id)).values()) for j, test_data in enumerate(test): log_lh = [] for ii in range(NUM_CLASSES): log_lh.append(sum(logpdf_gmm(test_data, Ws[ii], MUs[ii], COVs[ii]))) winning_class_ind = np.argmax(log_lh) print("Correct class {} | Winning class {} with value {}".format(i + 1, winning_class_ind + 1, log_lh[winning_class_ind])) errors = (errors + 1) if not ((winning_class_ind) == i) else errors
P_t = 0.5 M_t = 64 MUs_t = t_train_sound[numpy.random.randint(1, len(t_train_sound), M_t)] COVs_t = [numpy.var(t_train_sound, axis=0)] * M_t Ws_t = numpy.ones(M_t) / M_t P_nt = 1 - P_t M_nt = M_t MUs_nt = nt_train_sound[numpy.random.randint(1, len(nt_train_sound), M_nt)] COVs_nt = [numpy.var(nt_train_sound, axis=0)] * M_nt Ws_nt = numpy.ones(M_nt) / M_nt n = 32 for i in range(n): [Ws_t, MUs_t, COVs_t, TTL_t] = train_gmm( t_train_sound, Ws_t, MUs_t, COVs_t) [Ws_nt, MUs_nt, COVs_nt, TTL_nt] = train_gmm( nt_train_sound, Ws_nt, MUs_nt, COVs_nt) print("Training iteration: " + str(i + 1) + "/" + str(n)) with open("GMM_speech_results", "w") as f: for i, eval in enumerate(eval_sound): ll_t = logpdf_gmm(eval, Ws_t, MUs_t, COVs_t) ll_nt = logpdf_gmm(eval, Ws_nt, MUs_nt, COVs_nt) val = (sum(ll_t) + numpy.log(P_t)) - (sum(ll_nt) + numpy.log(P_nt)) f.write(eval_sound_names[i] + " " + str(val) + " " + ("1" if val > 0 else "0") + "\n")