Esempio n. 1
0
allseqs = np.array_split(alldata,250)
datas, heldout = hold_out(allseqs,0.05)
training_size = sum(data.shape[0] for data in datas)
print '...done!'

print '%d total frames' % sum(data.shape[0] for data in alldata)
print 'split into %d training and %d test sequences' % (len(datas),len(heldout))

### inference!

Nmax = 20
obs_hypparams = dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.2,nu_0=5)

hmm = models.HMM(
        obs_distns=[distributions.Gaussian(**obs_hypparams) for i in range(Nmax)],
        alpha=10.,init_state_concentration=1.)

scores = []
sgdseq = sgd_onepass(tau=0,kappa=0.7,datalist=datas)
for t, (data, rho_t) in progprint(enumerate(sgdseq)):
    hmm.meanfield_sgdstep(data, data.shape[0] / training_size, rho_t)

    if t % 10 == 0:
        scores.append(hmm.log_likelihood(heldout))

plt.figure()
plt.plot(scores)

plt.show()

else:
    f = np.load('/scratch/TMT_50p_5-8-13_processed_notpca.npz')
    data = f['data']
    test_data = data[test_slice]
    with open(test_data_cache_path,'w') as outfile:
        cPickle.dump(test_data,outfile,protocol=-1)
    print 'saved to cache %s' % test_data_cache_path

print '...done'

#############
#  Running  #
#############

likelihoods = collections.OrderedDict()
for name, ms in progprint(models.iteritems()):
    likelihoods[name] = \
            np.asarray([m.predictive_likelihoods(test_data,lookaheads)
                for m in ms]).mean(0)

plt.figure()
for name, ls in likelihoods.iteritems():
    plt.plot(lookaheads,np.asarray(map(np.mean,ls)),'x-',label=name[:-1])

    # means = np.asarray(map(np.mean,ls)) - gmm_likes
    # stds = np.asarray([np.std(l - g) for l,g in zip(ls,gmm_likes)])
    # plt.errorbar(lookaheads,means,stds,label=name[:-1])
plt.xlabel('steps into future')
plt.ylabel('log predictive likelihood')
plt.legend()
Esempio n. 3
0
# for data, changepoints in zip(datas[-2:], changepointss[-2:]):
#     model.add_data(data,changepoints=changepoints)

# for itr in progprint_xrange(25):
#     model.resample_model()

# plt.figure()
# model.plot()
# plt.gcf().suptitle('sampled')

# model.states_list = []

# svi

scores = []
for t, ((data,changepoints), rho_t) in progprint(
        sgd_manypass(0,0.6,zip(*[datas,changepointss]),npasses=1)):
    model.meanfield_sgdstep(
            data, minibatchsize(data) / training_size, rho_t,
            changepoints=changepoints)

# decode the last two just to take a look
for data, changepoints in zip(*[datas[-2:],changepointss[-2:]]):
    model.add_data(data,changepoints=changepoints)
    s = model.states_list[-1]
    s.mf_Viterbi()

plt.figure()
model.plot()
plt.gcf().suptitle('fit')

plt.matshow(np.vstack(
Esempio n. 4
0
        weight_prior_mean = 0, weight_prior_std = 0.1, win_size = win_size,  use_obs_features = use_obs_features)



np.random.seed(infseed)
if sgd_or_mf == 'mf':
    print('feature weights before mean field: ', '\n', HDPHMMSVImodel.feature_weights, '\n')
    for i in range(14):
        HDPHMMSVImodel.add_data(datas[i])
    for i in range(20):
        print(HDPHMMSVImodel.meanfield_coordinate_descent_step(0.5))
    print('feature weights after mean field: ', '\n', HDPHMMSVImodel.feature_weights, '\n')
else:
    scores = []
    sgdseq = sgd_passes(tau=0.8,kappa=0.9,datalist=datas, minibatchsize=4,npasses=30) #4, 3
    for t, (data, rho_t) in progprint(enumerate(sgdseq)):
        HDPHMMSVImodel.meanfield_sgdstep(data, np.array(data).shape[0] / np.float(training_size)  , rho_t)
        score = HDPHMMSVImodel.log_likelihood(heldout)
        # print 'feature weights after mean field: ', HDPHMMSVImodel.feature_weights
        print(score)
        print("")
        if t % 1 == 0:
            scores.append(score)
    # plt.plot(scores)
    # plt.show()



######################################Plotting the states and segments ###########################

f3 = plt.subplot(4, 1, 3)
else:
    f = np.load('/scratch/TMT_50p_5-8-13_processed_notpca.npz')
    data = f['data']
    test_data = data[test_slice]
    with open(test_data_cache_path,'w') as outfile:
        cPickle.dump(test_data,outfile,protocol=-1)
    print 'saved to cache %s' % test_data_cache_path

print '...done'

#############
#  Running  #
#############

likelihoods = collections.OrderedDict()
for name, ms in progprint(models.iteritems()):
    likelihoods[name] = \
            np.asarray([m.predictive_likelihoods(test_data,lookaheads)
                for m in ms]).mean(0)

plt.figure()
gmm_likes = np.asarray(map(np.mean,likelihoods['GMMs']))
for name, ls in likelihoods.iteritems():
    plt.plot(lookaheads,np.asarray(map(np.mean,ls)) - gmm_likes,'x-',label=name[:-1])

    # means = np.asarray(map(np.mean,ls)) - gmm_likes
    # stds = np.asarray([np.std(l - g) for l,g in zip(ls,gmm_likes)])
    # plt.errorbar(lookaheads,means,stds,label=name[:-1])
plt.xlabel('steps into future')
plt.ylabel('log predictive likelihood')
plt.legend()
Esempio n. 6
0
allseqs = np.array_split(alldata,250)
datas, heldout = hold_out(allseqs,0.05)
training_size = sum(data.shape[0] for data in datas)
print '...done!'

print '%d total frames' % sum(data.shape[0] for data in alldata)
print 'split into %d training and %d test sequences' % (len(datas),len(heldout))

### inference!

Nmax = 20
obs_hypparams = dict(mu_0=np.zeros(2),sigma_0=np.eye(2),kappa_0=0.2,nu_0=5)

hmm = models.DATruncHDPHMM(
        obs_distns=[distributions.Gaussian(**obs_hypparams) for i in range(Nmax)],
        alpha=10.,gamma=10.,init_state_concentration=1.)

scores = []
stepsizes = sgd_steps(tau=0,kappa=0.7,nsteps=len(datas))
for t, (data, rho_t) in progprint(enumerate(zip(datas,stepsizes))):
    hmm.meanfield_sgdstep(data, data.shape[0] / training_size, rho_t)

    if t % 10 == 0:
        scores.append(hmm.log_likelihood(heldout))

plt.figure()
plt.plot(scores)

plt.show()