def statePosteriors(log_alpha, log_beta): N = len(log_alpha) M = len(log_alpha[0]) sum = 0 y = [[0 for j in range(M)] for i in range(N)] sum = tools2.logsumexp(log_alpha[N - 1]) for n in range(N): for j in range(M): y[n][j] = log_alpha[n][j] + log_beta[n][j] - sum log_gamma = y return log_gamma
def gmmloglik(log_emlik, weights): """Log Likelihood for a GMM model based on Multivariate Normal Distribution. Args: log_emlik: array like, shape (N, K). contains the log likelihoods for each of N observations and each of K distributions weights: weight vector for the K components in the mixture Output: gmmloglik: scalar, log likelihood of data given the GMM model. """ log_lik_gmm = 0 for obs in range(len(log_emlik)): log_lik_gmm += tools2.logsumexp(log_emlik[obs, :] + np.log(weights)) return log_lik_gmm
def forward(log_emlik, log_startprob, log_transmat): N = len(log_emlik) M = len(log_emlik[0]) logAlpha = [[0 for x in range(M)] for y in range(N)] for j in range(M): logAlpha[0][j] = log_startprob[j] + log_emlik[0][j] for n in range(1, N): for j in range(M): logAlpha[n][j] = log_emlik[n][j] # building the array of the log sum sumArray = [] for i in range(M): sumArray += [logAlpha[n - 1][i] + log_transmat[i][j]] logAlpha[n][j] += tools2.logsumexp(np.array(sumArray)) return logAlpha
def backward(log_emlik, log_startprob, log_transmat): N = len(log_emlik) M = len(log_emlik[0]) logBeta = [[0 for x in range(M)] for y in range(N)] for i in range(M): logBeta[N - 1][i] = 0 for n in range(N - 2, -1, -1): for i in range(M): # building the array of the log sum sumArray = [] for j in range(M): sumArray += [ log_transmat[i][j] + log_emlik[n + 1][j] + logBeta[n + 1][j] ] logBeta[n][i] = tools2.logsumexp(np.array(sumArray)) return logBeta
def forward(log_emlik, log_startprob, log_transmat): """Forward probabilities in log domain. Args: log_emlik: NxM array of emission log likelihoods, N frames, M states log_startprob: log probability to start in state i log_transmat: log transition probability from state i to j Output: forward_prob: NxM array of forward log probabilities for each of the M states in the model """ log_a = np.zeros((log_emlik.shape[0], log_transmat.shape[0])) for j in range(log_transmat.shape[0]): log_a[0, j] = log_startprob[j] + log_emlik[0, j] for i in range(1, log_emlik.shape[0]): for j in range(log_transmat.shape[0]): log_a[i, j] = tools2.logsumexp( (log_a[i - 1, :] + log_transmat[:, j])) + log_emlik[i, j] return log_a
for j in range(len(models)): model = skm.log_multivariate_normal_density(tidigits[i]['mfcc'], models[j]['gmm']['means'], models[j]['gmm']['covars'], 'diag') loglik[j] = proto2.gmmloglik(model, models[j]['gmm']['weights']) gmm_class.append(loglik) gmm_class = np.array(gmm_class) # Find bigger values for each utterances gmm_ret_labels = np.argmax(gmm_class, axis=1) # Compute 6: HMM Likelihood and Recognition log_alpha = proto2.forward(hmm_obsloglik, np.log(models[0]['hmm']['startprob']), np.log(models[0]['hmm']['transmat']) ) # Compute marginalization: hmm_loglik = tools2.logsumexp( log_alpha[-1, :] ) # Try every models on each utterances (tidigits) and find the best --> conclude hmm_class = [] # For each utterances for i in range(len(tidigits)): loglik = np.zeros(len(models)) for j in range(len(models)): model = skm.log_multivariate_normal_density(tidigits[i]['mfcc'], models[j]['hmm']['means'], models[j]['hmm']['covars'], 'diag') log_alpha_class = proto2.forward(model, np.log(models[j]['hmm']['startprob']), np.log(models[j]['hmm']['transmat']) ) loglik[j] = tools2.logsumexp( log_alpha_class[-1, :] ) hmm_class.append(loglik) hmm_class = np.array(hmm_class)
def gmmloglik(logAlpha): return tools2.logsumexp(np.array(logAlpha[-1]))
def quest_6_1(flag): #--- Quest 6.1: ---->>> Forward Algorithm # --->>> check hmm_log_emlik = tools2.log_multivariate_normal_density_diag( X, models[0]['hmm']['means'], models[0]['hmm']['covars']) log_a = forward(hmm_log_emlik, np.log(models[0]['hmm']['startprob']), np.log(models[0]['hmm']['transmat'])) check_log_a = example['hmm_logalpha'] # print("log_a =", log_a) # print("check_log_a =", check_log_a) fig3 = plt.figure() plt.subplot(2, 1, 1) plt.imshow(log_a.T, cmap='jet') plt.title('log_a') plt.xticks([], []) plt.gca().invert_yaxis() plt.subplot(2, 1, 2) plt.imshow(example['hmm_logalpha'].T, cmap='jet') plt.title('ckeck log_a') plt.xticks([], []) plt.gca().invert_yaxis() plt.savefig("../figs/quest_6_1.png", bbox_inches='tight') # plt.show() #Convert the formula you have derived into log domain log_lik_a = tools2.logsumexp(log_a[-1, :]) check_log_lik_a = example['hmm_loglik'] if log_lik_a == check_log_lik_a: print('True') if flag == 'HMM': utters = len(tidigits) models_len = len(models) log_lik_a = np.zeros((utters, models_len)) for utter in range(utters): for digit in range(models_len): hmm_log_emlik = tools2.log_multivariate_normal_density_diag( tidigits[utter]['mfcc'], models[digit]['hmm']['means'], models[digit]['hmm']['covars']) log_a = forward(hmm_log_emlik, np.log(models[digit]['hmm']['startprob']), np.log(models[digit]['hmm']['transmat'])) log_lik_a[utter, digit] = tools2.logsumexp(log_a[-1, :]) #---->>> Check for misrecognized utterances miss = 0 print("------ HMM a-pass ------") for utter in range(utters): best_score = np.argmax(log_lik_a[utter, :]) print('tid digit, mod digit: ---> ' + str(tidigits[utter]['digit']) + " - " + str(models[best_score]['digit']) ) # Uncomment to see the results analytically! if models[best_score]['digit'] != tidigits[utter]['digit']: miss += 1 accuracy = ((utters - miss) / utters) * 100 print() print("Misrecognized %d out of %d utterances." % (miss, len(tidigits))) print("Accuracy = " + str("%.2f" % round(accuracy, 2)) + '%') print() elif flag == "HMM as GMM": utters = len(tidigits) models_len = len(models) log_like_hmm = np.zeros((utters, models_len)) for utter in range(utters): for digit in range(models_len): hmm_log_emlik = tools2.log_multivariate_normal_density_diag( tidigits[utter]['mfcc'], models[digit]['hmm']['means'], models[digit]['hmm']['covars']) weights_hmm = np.ones( models[digit]['hmm']['startprob'].shape[0] ) / models[digit]['hmm']['startprob'].shape[0] log_like_hmm[utter, digit] = gmmloglik(hmm_log_emlik, weights_hmm) #---->>> Check for misrecognized utterances miss = 0 print("------ HMM as GMM ------") for utter in range(utters): best_score = np.argmax(log_like_hmm[utter, :]) print('tid digit, mod digit: ---> ' + str(tidigits[utter]['digit']) + " - " + str(models[best_score]['digit']) ) # Uncomment to see the results analytically! if models[best_score]['digit'] != tidigits[utter]['digit']: miss += 1 accuracy = ((utters - miss) / utters) * 100 print() print("Misrecognized %d out of %d utterances." % (miss, len(tidigits))) print("Accuracy = " + str("%.2f" % round(accuracy, 2)) + '%') print()