Example #1
0
from lab2_proto import *
from lab2_tools import *
from prondict import prondict
import matplotlib.pyplot as plt

data = np.load('lab2_data.npz')['data']

one_speaker_model = np.load('lab2_models_onespkr.npz')
all_model = np.load('lab2_models_all.npz')
example = np.load('lab2_example.npz')["example"]
example.shape = (1, )
example = example[0]
# load one model

isolated = {}
for digit in prondict.keys():
    isolated[digit] = ['sil'] + prondict[digit] + ['sil']

phoneHMMs = np.load('lab2_models_onespkr.npz')['phoneHMMs'].item()

wordHMMs = {}
for key in isolated.keys():
    wordHMMs[key] = concatHMMs(phoneHMMs, isolated[key])

# log_multivariate_normal_density_diag(X, means, covars):
# test correctness

if False:
    result = log_multivariate_normal_density_diag(example["lmfcc"],
                                                  wordHMMs['o']["means"],
                                                  wordHMMs['o']["covars"])
Example #2
0
def main():
    np.seterr(divide='ignore')  # Suppress divide by zero warning
    example = np.load('lab2_example.npz', allow_pickle=True)['example'].item()
    phone_hmms = np.load('lab2_models_all.npz',
                         allow_pickle=True)['phoneHMMs'].item()

    # Concatenate all digit hmms
    word_hmms = {}
    for digit in prondict.keys():
        word_hmms[digit] = concatHMMs(phone_hmms,
                                      ['sil'] + prondict[digit] + ['sil'])

    data = np.load('lab2_data.npz', allow_pickle=True)['data']

    best_model = {}
    acc_count = 0
    print("Running Forward algorithm...")
    for idx, dt in tqdm(enumerate(data)):  # Iterate over data samples
        maxloglik = None
        for digit in word_hmms.keys():  # Iterate over hmms
            obsloglik = log_multivariate_normal_density_diag(
                dt['lmfcc'], word_hmms[digit]['means'],
                word_hmms[digit]['covars'])
            logalpha = forward(obsloglik,
                               np.log(word_hmms[digit]['startprob']),
                               np.log(word_hmms[digit]['transmat']))
            loglik = logsumexp(logalpha[-1])
            if maxloglik is None or maxloglik < loglik:  # If better likelihood found
                best_model[idx] = digit  # Set most probable model
                maxloglik = loglik  # Update max log likelihood
        if dt['digit'] == best_model[idx]:
            acc_count += 1
        # print("The best model for utterance " + str(idx) + " was hmm: " + str(best_model[idx]))
        # print("The real digit of utterance " + str(idx) + " was digit: " + str(dt['digit']) + "\n")
    print("The accuracy of the predictions has been: " +
          str(np.round(acc_count / len(data) * 100, 2)) + "%")

    np.seterr(divide='ignore')  # Suppress divide by zero warning
    logalpha = forward(example['obsloglik'],
                       np.log(word_hmms['o']['startprob']),
                       np.log(word_hmms['o']['transmat']))
    vloglik, vpath = viterbi(example['obsloglik'],
                             np.log(word_hmms['o']['startprob']),
                             np.log(word_hmms['o']['transmat']))
    plt.pcolormesh(logalpha.T)
    plt.plot(vpath.T, color="red")
    plt.show()

    best_model = {}
    acc_count = 0
    print("Running Viterbi algorithm...")
    for idx, dt in tqdm(enumerate(data)):  # Iterate over data samples
        maxloglik = None
        for digit in word_hmms.keys():  # Iterate over hmms
            obsloglik = log_multivariate_normal_density_diag(
                dt['lmfcc'], word_hmms[digit]['means'],
                word_hmms[digit]['covars'])
            vloglik, vpath = viterbi(obsloglik,
                                     np.log(word_hmms[digit]['startprob']),
                                     np.log(word_hmms[digit]['transmat']))
            if maxloglik is None or maxloglik < vloglik:  # If better likelihood found
                best_model[idx] = digit  # Set most probable model
                maxloglik = vloglik  # Update max log likelihood
        if dt['digit'] == best_model[idx]:
            acc_count += 1
        # print("The best model for utterance " + str(idx) + " was hmm: " + str(best_model[idx]))
        # print("The real digit of utterance " + str(idx) + " was digit: " + str(dt['digit']) + "\n")
    print("The accuracy of the predictions has been: " +
          str(np.round(acc_count / len(data) * 100, 2)) + "%")

    # Baum-Welch algorithm
    best_loglik = None
    best_model = None
    for digit in word_hmms.keys():  # Iterate over hmms
        print("Trying model " + str(digit))
        means = word_hmms[digit]['means']
        covars = word_hmms[digit]['covars']
        obsloglik = log_multivariate_normal_density_diag(
            data[10]['lmfcc'], means, covars)
        vloglik = 0
        newloglik = viterbi(obsloglik, np.log(word_hmms[digit]['startprob']),
                            np.log(word_hmms[digit]['transmat']))[0]
        it = 0
        while it < 20 and abs(newloglik - vloglik) > 1.0:
            vloglik = newloglik  # Update value of log-likelihood
            forward_prob = forward(obsloglik,
                                   np.log(word_hmms[digit]['startprob']),
                                   np.log(word_hmms[digit]['transmat']))
            backward_prob = backward(obsloglik,
                                     np.log(word_hmms[digit]['transmat']))
            log_gamma = statePosteriors(forward_prob, backward_prob)
            means, covars = updateMeanAndVar(data[10]['lmfcc'], log_gamma)
            obsloglik = log_multivariate_normal_density_diag(
                data[10]['lmfcc'], means, covars)
            newloglik = viterbi(obsloglik,
                                np.log(word_hmms[digit]['startprob']),
                                np.log(word_hmms[digit]['transmat']))[0]
            it += 1  # Update number of iterations
        print("Log-likelihood: " + str(newloglik) +
              ". Number of iterations until convergence: " + str(it))
        if best_loglik is None or newloglik > best_loglik:
            best_loglik = newloglik
            best_model = digit
    print("The best log-likelihood is: " + str(best_loglik) +
          ". It corresponds to the model " + str(best_model))
Example #3
0
def maintask(task):
    data = np.load('lab2_data.npz')['data']
    phoneHMMs = np.load('lab2_models_onespkr.npz')['phoneHMMs'].item()
    phoneHMMs_all = np.load('lab2_models_all.npz')['phoneHMMs'].item()
    if task == '4':
        hmm1 = phoneHMMs['ah']
        hmm2 = phoneHMMs['ao']
        twohmm = concatTwoHMMs(hmm1, hmm2)
    """5 HMM Likelihood and Recognition"""
    example = np.load('lab2_example.npz')['example'].item()
    isolated = {}
    for digit in prondict.keys():
        isolated[digit] = ['sil'] + prondict[digit] + ['sil']
    wordHMMs = {}
    wordHMMs_all = {}
    for digit in prondict.keys():
        wordHMMs[digit] = concatHMMs(phoneHMMs, isolated[digit])
    # for 11 digits
    for digit in prondict.keys():
        wordHMMs_all[digit] = concatHMMs(phoneHMMs_all, isolated[digit])
    # example
    lpr = log_multivariate_normal_density_diag(example['lmfcc'],
                                               wordHMMs['o']['means'],
                                               wordHMMs['o']['covars'])
    diff = example['obsloglik'] - lpr  # 0
    # same digit 'o'
    lpr_o = log_multivariate_normal_density_diag(data[22]['lmfcc'],
                                                 wordHMMs_all['o']['means'],
                                                 wordHMMs_all['o']['covars'])
    if task == '5.1':
        plt.figure()
        plt.subplot(2, 1, 1)
        plt.pcolormesh(lpr.T)
        plt.title('example "o" ')
        plt.colorbar()
        plt.subplot(2, 1, 2)
        plt.pcolormesh(lpr_o.T)
        plt.title('test "o" from data22')
        plt.colorbar()
        plt.show()
    """
    5.2
    """
    lalpha = forward(lpr, np.log(wordHMMs['o']['startprob']),
                     np.log(wordHMMs['o']['transmat']))
    diff1 = example['logalpha'] - lalpha  # 0
    # log-likelihood
    loglike = logsumexp(lalpha[-1])
    diff0 = example['loglik'] - loglike  # 0

    # score all the 44 utterances in the data array with each of the 11 HMM
    # models in wordHMMs.
    scores_1 = np.zeros((44, 11))
    scores_2 = np.zeros((44, 11))
    labels_ori = []
    labels_pre = []
    labels_pre2 = []
    keys = list(prondict.keys())
    acc_1 = 0
    acc_2 = 0
    if task == '5.2':
        for i in range(44):
            for j, key in enumerate(keys):
                lpr = log_multivariate_normal_density_diag(
                    data[i]['lmfcc'], wordHMMs_all[key]['means'],
                    wordHMMs_all[key]['covars'])
                alpha = forward(lpr, np.log(wordHMMs_all[key]['startprob']),
                                np.log(wordHMMs_all[key]['transmat']))
                scores_2[i, j] = logsumexp(alpha[-1])
                lpr_1 = log_multivariate_normal_density_diag(
                    data[i]['lmfcc'], wordHMMs[key]['means'],
                    wordHMMs[key]['covars'])
                alpha_1 = forward(lpr_1, np.log(wordHMMs[key]['startprob']),
                                  np.log(wordHMMs[key]['transmat']))
                scores_1[i, j] = logsumexp(alpha_1[-1])
            ori = data[i]['digit']
            pre_1 = keys[int(np.argmax(scores_1[i, :]))]
            pre_2 = keys[int(np.argmax(scores_2[i, :]))]
            #labels_ori.append(ori)
            labels_pre.append(pre_1)
            labels_pre2.append(pre_2)
            if ori == pre_1:
                acc_1 += 1
            if ori == pre_2:
                acc_2 += 1
        print(
            "Accuracy(trained on all speakers): {0}; Accuracy(trained on one speaker):{1} "
            .format(acc_2, acc_1))
        print(labels_pre, labels_pre2)
    """
    5.3 Viterbi
    """
    viterbi_loglik, viterbi_path = viterbi(lpr,
                                           np.log(wordHMMs['o']['startprob']),
                                           np.log(wordHMMs['o']['transmat']))

    if task == '5.3':
        plt.pcolormesh(lalpha.T)
        plt.plot(viterbi_path, 'r')
        plt.title(
            'alpha array overlaid with best path obtained by Viterbi decoding '
        )
        plt.colorbar()
        plt.show()
        diff3 = example['vloglik'] - viterbi_loglik.T  # 0

        # Score all 44 utterances in the data with each of the 11 HMM models in wordHMMs
        for i in range(44):
            for j, key in enumerate(keys):
                lpr = log_multivariate_normal_density_diag(
                    data[i]['lmfcc'], wordHMMs_all[key]['means'],
                    wordHMMs_all[key]['covars'])
                viterbi_2, viterbi_path_2 = viterbi(
                    lpr, np.log(wordHMMs_all[key]['startprob']),
                    np.log(wordHMMs_all[key]['transmat']))
                scores_2[i, j] = viterbi_2
                lpr_1 = log_multivariate_normal_density_diag(
                    data[i]['lmfcc'], wordHMMs[key]['means'],
                    wordHMMs[key]['covars'])
                viterbi_1, viterbi_path_1 = viterbi(
                    lpr_1, np.log(wordHMMs[key]['startprob']),
                    np.log(wordHMMs[key]['transmat']))
                scores_1[i, j] = viterbi_1
            ori = data[i]['digit']
            pre_1 = keys[int(np.argmax(scores_1[i, :]))]
            pre_2 = keys[int(np.argmax(scores_2[i, :]))]
            #labels_ori.append(ori)
            labels_pre.append(pre_1)
            labels_pre2.append(pre_2)
            if ori == pre_1:
                acc_1 += 1
            if ori == pre_2:
                acc_2 += 1
        print(
            "Accuracy(trained on all speakers): {0}; Accuracy(trained on one speaker):{1} "
            .format(acc_2, acc_1))
        print(labels_pre, labels_pre2)
    """
    5.4
    """
    lbeta = backward(lpr, np.log(wordHMMs['o']['startprob']),
                     np.log(wordHMMs['o']['transmat']))
    diff2 = example['logbeta'] - lbeta
    # log-likelihood
    loglike = logsumexp(lbeta[0])
    diff4 = example['loglik'] - loglike  # 0
    if task == '5.4':
        plt.figure()
        plt.subplot(1, 3, 1)
        plt.pcolormesh(lbeta)
        plt.title('log-beta')
        plt.subplot(1, 3, 2)
        plt.pcolormesh(example['logbeta'])
        plt.title('example')
        plt.subplot(1, 3, 3)
        plt.pcolormesh(example['logalpha'])
        plt.title('log-alpha')
        plt.show()
    """6 HMM Retraining(emission probability distributions)"""
    """
    6.1
    """
    lgamma = statePosteriors(lalpha, lbeta)
    N = lgamma.shape[0]
    K = 9
    lgamma_gmm = np.zeros((N, K))
    total = log_multivariate_normal_density_diag(example['lmfcc'],
                                                 wordHMMs['o']['means'],
                                                 wordHMMs['o']['covars'])
    if task == '6.1':
        print('HMM posteriors')
        print('each time step sum along state axis',
              np.sum(np.exp(lgamma), axis=1))  #=1
        print('each state sum along time axis',
              np.sum(np.exp(lgamma) / 71, axis=0))
        print('sum over both states and time steps',
              np.sum(np.sum(
                  np.exp(lgamma))))  # =length of obs sequence/time steps
        print('length of observation sequence', lalpha.shape[0])
        print('GMM posteriors')
        # for k in range(K):
        #lgamma_gmm[:, k] = 1 / K * total[:, k] / np.sum(total[:, k])
        gmm = mixture.GaussianMixture(n_components=9)
        gmm.fit(example['lmfcc'])
        gmm_post = gmm.predict_proba(example['lmfcc'])
        plt.subplot(2, 1, 1)
        plt.pcolormesh(gmm_post.T)
        plt.title('GMM posteriors')
        plt.colorbar()
        plt.subplot(2, 1, 2)
        plt.pcolormesh(lgamma.T)
        plt.title('HMM posteriors')
        plt.colorbar()
        plt.show()
    """6.2"""
    if task == '6.2':
        plt.figure()
        L = {}
        for d in prondict:
            # initialization
            log_pi = np.log(wordHMMs_all[d]['startprob'])
            log_tr = np.log(wordHMMs_all[d]['transmat'])
            means = wordHMMs_all[d]['means']
            covars = wordHMMs_all[d]['covars']
            l = []
            # repitation:
            for i in range(20):
                lpr = log_multivariate_normal_density_diag(
                    data[10]['lmfcc'], means, covars)
                # Expectation
                lalpha = forward(lpr, log_pi, log_tr)
                lbeta = backward(lpr, log_pi, log_tr)
                log_gamma = statePosteriors(lalpha, lbeta)
                # Maximization
                means, covars = updateMeanAndVar(data[10]['lmfcc'], log_gamma)
                # Estimate likelihood
                log_like = logsumexp(lalpha[-1])
                if i > 2 and log_like - l[-1] < 0.1:
                    l.append(log_like)
                    L[d] = l
                    break
                else:
                    l.append(log_like)
                    L[d] = l

            plt.plot(l, label=d)

        plt.legend()
        plt.title('log-likelihood (data[10] with different wordHMMs)')
        plt.show()