Python train_gmmの例、ikrlib.train_gmm Pythonの例

コード例 #1

0

ファイルを表示

ファイル: voice_GMM.py プロジェクト: tiborkubik/Face-and-speech-recognition

def trainGMMs(trainClasses, gaussWeights, meanValsGauss, covarMatrices):
    print("STEP3: Training all train data using GMM.")
    # In each iteraiton, we have to make one training step for each training class
    for iteration in range(TRAINING_ITERATIONS):
        print("Training iteration: ", iteration)

        # calculating new values
        for singleClass in trainClasses:
            weights[singleClass], meanValues[singleClass], covarMatrices[
                singleClass], TTL = train_gmm(trainClasses[singleClass],
                                              weights[singleClass],
                                              meanValues[singleClass],
                                              covarMatrices[singleClass])

    print("STEP3 Done: Training finished.\n")

    return weights, meanValues, covarMatrices

コード例 #2

0

ファイルを表示

ws1 = np.ones(m1) / m1

m2 = 2
mus2 = x2[randint(1, len(x2), m2)]
covs2 = [cov2] * m2
ws2 = np.ones(m2) / m2

#fig = plt.figure()
#ims = []

# Run 30 iterations of EM algorithm to train the two GMM models
for i in range(30):
    plt.plot(x1[:,0], x1[:,1], 'r.', x2[:,0], x2[:,1], 'b.')
    for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10))
    for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10))
    ws1, mus1, covs1, ttl1 = train_gmm(x1, ws1, mus1, covs1)
    ws2, mus2, covs2, ttl2 = train_gmm(x2, ws2, mus2, covs2)
    print('Total log-likelihood: %s for class X1; %s for class X2' % (ttl1, ttl2))
    plt.show()

hard_decision = lambda x: logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) > logpdf_gmm(x, ws2, mus2, covs2) + np.log(p2)
plot2dfun(hard_decision, ax, 500)
plt.plot(x1[:,0], x1[:,1], 'r.')
plt.plot(x2[:,0], x2[:,1], 'b.')
for w, m, c in zip(ws1, mus1, covs1): gellipse(m, c, 100, 'r', lw=round(w * 10))
for w, m, c in zip(ws2, mus2, covs2): gellipse(m, c, 100, 'b', lw=round(w * 10))

plt.figure()
x1_posterior  = lambda x: logistic_sigmoid(logpdf_gmm(x, ws1, mus1, covs1) + np.log(p1) - logpdf_gmm(x, ws2, mus2, covs2) - np.log(p2))
plot2dfun(x1_posterior, ax, 500)
plt.plot(x1[:,0], x1[:,1], 'r.')

コード例 #3

0

ファイルを表示

ファイル: demo_genderID.py プロジェクト: Mylanos/Machine_Learning_and_Recognition_SUR

# Initialize all variance vectors (diagonals of the full covariance matrices) to
# the same variance vector computed using all the data from the given class
COVs_m = [np.var(train_m, axis=0)] * M_m

# Use uniform distribution as initial guess for the weights
Ws_m = np.ones(M_m) / M_m

# Initialize parameters of feamele model
M_f = 5
MUs_f = train_f[randint(1, len(train_f), M_f)]
COVs_f = [np.var(train_f, axis=0)] * M_f
Ws_f = np.ones(M_f) / M_f

# Run 30 iterations of EM algorithm to train the two GMMs from males and females
for jj in range(30):
    [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m)
    [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f)
    print('Iteration:', jj, ' Total log-likelihood:', TTL_m, 'for males;',
          TTL_f, 'for frmales')

# Now run recognition for all male test utterances
# To do the same for females set "test_set=test_f"
score = []
for tst in test_m:
    ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
    ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
    score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))
print(score)

score = []
for tst in test_f:

コード例 #4

0

ファイルを表示

ファイル: audio_EM.py プロジェクト: jsemric/IKR-project

def main():
    check_dir(os.path.dirname(negative_test_path))
    check_dir(os.path.dirname(negative_train_path))
    check_dir(os.path.dirname(positive_test_path))
    check_dir(os.path.dirname(positive_train_path))

    train_m = list(wav16khz2mfcc(positive_train_path).values())
    train_f = list(wav16khz2mfcc(negative_train_path).values())
    test_m = list(wav16khz2mfcc(positive_test_path).values())
    test_f = list(wav16khz2mfcc(negative_test_path).values())

    train_m = np.vstack(train_m)
    train_f = np.vstack(train_f)
    dim = train_m.shape[1]

    cov_tot = np.cov(np.vstack([train_m, train_f]).T, bias=True)
    d, e = scipy.linalg.eigh(cov_tot, eigvals=(dim - 2, dim - 1))

    train_m_pca = train_m.dot(e)
    train_f_pca = train_f.dot(e)
    # Classes are not well separated in 2D PCA subspace

    n_m = len(train_m)
    n_f = len(train_f)
    cov_wc = (n_m * np.cov(train_m.T, bias=True) +
              n_f * np.cov(train_f.T, bias=True)) / (n_m + n_f)
    cov_ac = cov_tot - cov_wc
    d, e = scipy.linalg.eigh(cov_ac, cov_wc, eigvals=(dim - 1, dim - 1))

    # Lets define uniform a-priori probabilities of classes:
    P_m = 0.5
    P_f = 1 - P_m

    ll_m = logpdf_gauss(test_m[0], np.mean(train_m, axis=0),
                        np.var(train_m, axis=0))
    ll_f = logpdf_gauss(test_m[0], np.mean(train_f, axis=0),
                        np.var(train_f, axis=0))

    posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m +
                                        np.exp(ll_f) * P_f)

    ll_m = logpdf_gauss(test_m[0], *train_gauss(train_m))
    ll_f = logpdf_gauss(test_m[0], *train_gauss(train_f))
    # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss'
    posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m +
                                        np.exp(ll_f) * P_f)
    plt.figure()
    plt.plot(posterior_m, 'b')
    plt.plot(1 - posterior_m, 'r')
    plt.figure()
    plt.plot(ll_m, 'b')
    plt.plot(ll_f, 'r')

    # Again gaussian models with full covariance matrices. Now testing a female utterance

    ll_m = logpdf_gauss(test_f[1], *train_gauss(train_m))
    ll_f = logpdf_gauss(test_f[1], *train_gauss(train_f))
    # '*' before 'train_gauss' pases both return values (mean and cov) as parameters of 'logpdf_gauss'
    posterior_m = np.exp(ll_m) * P_m / (np.exp(ll_m) * P_m +
                                        np.exp(ll_f) * P_f)
    plt.figure()
    plt.plot(posterior_m, 'b')
    plt.plot(1 - posterior_m, 'r')
    plt.figure()
    plt.plot(ll_m, 'b')
    plt.plot(ll_f, 'r')

    score = []
    mean_m, cov_m = train_gauss(train_m)
    mean_f, cov_f = train_gauss(train_f)
    for tst in test_m:
        ll_m = logpdf_gauss(tst, mean_m, cov_m)
        ll_f = logpdf_gauss(tst, mean_f, cov_f)
        score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))

    # Run recognition with 1-dimensional LDA projected data
    score = []
    mean_m, cov_m = train_gauss(train_m.dot(e))
    mean_f, cov_f = train_gauss(train_f.dot(e))
    for tst in test_m:
        ll_m = logpdf_gauss(tst.dot(e), mean_m, np.atleast_2d(cov_m))
        ll_f = logpdf_gauss(tst.dot(e), mean_f, np.atleast_2d(cov_f))
        score.append((sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f)))

    M_m = 12

    MUs_m = train_m[randint(1, len(train_m), M_m)]
    COVs_m = [np.var(train_m, axis=0)] * M_m
    Ws_m = np.ones(M_m) / M_m

    M_f = 7
    MUs_f = train_f[randint(1, len(train_f), M_f)]
    COVs_f = [np.var(train_f, axis=0)] * M_f
    Ws_f = np.ones(M_f) / M_f

    for jj in range(100):
        [Ws_m, MUs_m, COVs_m, TTL_m] = train_gmm(train_m, Ws_m, MUs_m, COVs_m)
        [Ws_f, MUs_f, COVs_f, TTL_f] = train_gmm(train_f, Ws_f, MUs_f, COVs_f)

    score = []
    testok = 0
    testnok = 0
    for tst in test_m:
        ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
        ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
        scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))
        score.append(scr)
        if scr >= 0:
            testok += 1
        else:
            testnok += 1
    print("target is " + str(testok / (testok + testnok)))

    score = []
    testok = 0
    testnok = 0
    for tst in test_f:
        ll_m = logpdf_gmm(tst, Ws_m, MUs_m, COVs_m)
        ll_f = logpdf_gmm(tst, Ws_f, MUs_f, COVs_f)
        scr = (sum(ll_m) + np.log(P_m)) - (sum(ll_f) + np.log(P_f))
        score.append(scr)
        if scr < 0:
            testok += 1
        else:
            testnok += 1
    print("non target is " + str(testok / (testok + testnok)))
    print('Saved as "GMM_model.pkl"')
    with open('GMM_model.pkl', 'wb') as f:
        pickle.dump([Ws_m, MUs_m, COVs_m, Ws_f, MUs_f, COVs_f], f)

コード例 #5

0

ファイルを表示

ファイル: speech_recognition.py プロジェクト: vdusek/VUT_FIT

mus = [[None] * m] * len(train_sample)
covs = [None] * len(train_sample)
ws = [None] * len(train_sample)

for index in xrange(len(train_sample)):
    mus[index] = train_sample[index][randint(1, len(train_sample[index]), m)]
    # Initialize all covariance matrices to the same covariance matrices computed using
    # all the data from the given class
    covs[index] = [cov[index]] * m
    ws[index] = np.ones(m) / m

ttl = [None] * len(train_sample)
# Run 110 iterations of EM algorithm to train GMM models
for i in range(110):
    for index in xrange(len(train_sample)):
        ws[index], mus[index], covs[index], ttl[index] = train_gmm(
            train_sample[index], ws[index], mus[index], covs[index])

ll_c = [None] * 2
ll_c_new = [None] * 2

f = open("gmm_speech.txt", "w")

for test in xrange(len(real_test_sample)):

    for index in xrange(len(train_sample)):
        ll_c[index] = sum(
            logpdf_gmm(real_test_sample[test], ws[index], mus[index],
                       covs[index])) + np.log(P_c[index])

    ll_index_max = np.argmax(ll_c)

コード例 #6

0

ファイルを表示

    COVs = []

    for i in range(NUM_CLASSES):
        id = i + 1
        print("Loading data for class {}".format(id))
        train = np.vstack(wav16khz2mfcc("train/{}".format(id)).values())

        print("Training model for class {}".format(id))
        M.append(32)
        Ws.append(np.ones(M[i]) / M[i])
        MUs.append(train[np.random.randint(1, len(train), M[i])])
        COVs.append([np.var(train, axis=0)] * M[i])

        n = 15
        for iteration in range(n):
            [Ws[i], MUs[i], COVs[i], TTL] = train_gmm(train, Ws[i], MUs[i], COVs[i])
            print("Training iteration: {}/{}, total log-likelihood: {}".format(iteration + 1, n, TTL))

    errors = 0
    trials = 0
    for i in range(NUM_CLASSES):
        id = i + 1
        test = list(wav16khz2mfcc("dev/{}".format(id)).values())

        for j, test_data in enumerate(test):
            log_lh = []
            for ii in range(NUM_CLASSES):
                log_lh.append(sum(logpdf_gmm(test_data, Ws[ii], MUs[ii], COVs[ii])))
            winning_class_ind = np.argmax(log_lh)
            print("Correct class {} | Winning class {} with value {}".format(i + 1, winning_class_ind + 1, log_lh[winning_class_ind]))
            errors = (errors + 1) if not ((winning_class_ind) == i) else errors

コード例 #7

0

ファイルを表示

P_t = 0.5
M_t = 64
MUs_t = t_train_sound[numpy.random.randint(1, len(t_train_sound), M_t)]
COVs_t = [numpy.var(t_train_sound, axis=0)] * M_t
Ws_t = numpy.ones(M_t) / M_t

P_nt = 1 - P_t
M_nt = M_t
MUs_nt = nt_train_sound[numpy.random.randint(1, len(nt_train_sound), M_nt)]
COVs_nt = [numpy.var(nt_train_sound, axis=0)] * M_nt
Ws_nt = numpy.ones(M_nt) / M_nt

n = 32
for i in range(n):
    [Ws_t, MUs_t, COVs_t, TTL_t] = train_gmm(
        t_train_sound, Ws_t, MUs_t, COVs_t)
    [Ws_nt, MUs_nt, COVs_nt, TTL_nt] = train_gmm(
        nt_train_sound, Ws_nt, MUs_nt, COVs_nt)

    print("Training iteration: " + str(i + 1) + "/" + str(n))

with open("GMM_speech_results", "w") as f:
    for i, eval in enumerate(eval_sound):
        ll_t = logpdf_gmm(eval, Ws_t, MUs_t, COVs_t)
        ll_nt = logpdf_gmm(eval, Ws_nt, MUs_nt, COVs_nt)
        val = (sum(ll_t) + numpy.log(P_t)) - (sum(ll_nt) + numpy.log(P_nt))

        f.write(eval_sound_names[i] + " " + str(val) +
                " " + ("1" if val > 0 else "0") + "\n")