Python GaussianHMMの例、sklearn.hmm.GaussianHMM Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_hmm.py プロジェクト: utsav2601/digit-recognizer

    def test_score_samples_and_decode(self):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.means_ = self.means
        h.covars_ = self.covars[self.covariance_type]

        # Make sure the means are far apart so posteriors.argmax()
        # picks the actual component used to generate the observations.
        h.means_ = 20 * h.means_

        gaussidx = np.repeat(np.arange(self.n_components), 5)
        nobs = len(gaussidx)
        obs = self.prng.randn(nobs, self.n_features) + h.means_[gaussidx]

        ll, posteriors = h.score_samples(obs)

        self.assertEqual(posteriors.shape, (nobs, self.n_components))
        assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs))

        viterbi_ll, stateseq = h.decode(obs)
        assert_array_equal(stateseq, gaussidx)

コード例 #2

0

ファイルを表示

    def test_fit_with_priors(self, params='stmc', n_iter=10, verbose=False):
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars[self.covariance_type]

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = hmm.normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars[self.covariance_type]
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in xrange(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.fit(train_obs[:1], n_iter=0)

        trainll = train_hmm_and_keep_track_of_log_likelihood(h,
                                                             train_obs,
                                                             n_iter=n_iter,
                                                             params=params)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print
            print('Test MAP train: %s (%s)\n  %s\n  %s' %
                  (self.covariance_type, params, trainll, np.diff(trainll)))
        # XXX: Why such a large tolerance?
        self.assertTrue(np.all(np.diff(trainll) > -0.5))

コード例 #3

0

ファイルを表示

def make_model_metric_pickle(state1_min_mean, state1_max_mean, state1_min_cov,
                             state1_max_cov, num_iter, num_test_houses):
    devices_types = get_test_data(num_test_houses)
    errors_mean_cov = []
    error_dict = {}
    device_type_name = 'air1'
    state1_means = np.linspace(state1_min_mean, state1_max_mean, num_iter)
    state1_covs = np.linspace(state1_min_cov, state1_max_cov, num_iter)
    best_error = 100000000
    for state1_mean in state1_means:
        errors_cov = []
        for state1_cov in state1_covs:
            print str(state1_mean) + ", " + str(state1_cov)
            pi_prior = np.array([0.9, 0.1])
            a_prior = np.array([[0.95, 0.05], [0.05, 0.95]])
            mean_prior = np.array([[0], [state1_mean]])
            cov_prior = np.array([[[0.0001]], [[state1_cov]]])
            model = hmm.GaussianHMM(pi_prior.size, 'full', pi_prior, a_prior)
            model.means_ = mean_prior
            model.covars_ = cov_prior
            error = get_model_error_from_trace0(
                devices_types[device_type_name].instances, model)
            if (error < best_error):
                best_mean = state1_mean
                best_cov = state1_cov
                best_error = error
            errors_cov.append(error)
        errors_mean_cov.append(errors_cov)
    error_dict['error_vals'] = errors_mean_cov
    error_dict['cov_vals'] = state1_covs
    error_dict['mean_vals'] = state1_means
    error_dict['best_mean'] = best_mean
    error_dict['best_cov'] = best_cov
    error_dict['best_error'] = best_error
    with open(
            'error_m_' + str(state1_min_mean) + '_' + str(state1_max_mean) +
            '_c_' + str(state1_min_cov) + '_' + str(state1_max_cov) + '.pkl',
            'w') as f:
        pickle.dump(error_dict, f)

コード例 #4

0

ファイルを表示

def init_HMM(pi_prior, a_prior, mean_prior, cov_prior):
    '''
    Initializes a trace object from a series and a metadata dictionary.
    Series must be sampled at a particular sample rate
    pi_prior is the starting probability of the HMM
    a_prior is the transition matrix of the HMM
    mean_prior is the initial mean value of each state
    cov_prior is the initial covariance of each state

    For an n-state HMM:

    * pi_prior is a 1-D numpy array of size n
    * a_prior is a 2-D numpy array of size n x n
    * mean_prior is an numpy array of size n
    * cov_prior is a 3-D numpy array that has been tiled into two rows,
      one column, and n third dimensional states.

      * ex) np.tile(1,(2,1,n)) for uniform covariance to start with.
    '''
    model = hmm.GaussianHMM(pi_prior.size, 'full', pi_prior, a_prior)
    model.means_ = mean_prior
    model.covars_ = cov_prior
    return model

コード例 #5

0

ファイルを表示

ファイル: plot_hmm_sampling.py プロジェクト: zhangwj0101/scikit-learn

# The transition matrix, note that there are no transitions possible
# between component 1 and 4
trans_mat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0],
                      [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]])
# The means of each component
means = np.array([
    [0.0, 0.0],
    [0.0, 11.0],
    [9.0, 10.0],
    [11.0, -1.0],
])
# The covariance of each component
covars = .5 * np.tile(np.identity(2), (4, 1, 1))

# Build an HMM instance and set parameters
model = hmm.GaussianHMM(4, "full", start_prob, trans_mat, random_state=42)

# Instead of fitting it from the data, we directly set the estimated
# parameters, the means and covariance of the components
model.means_ = means
model.covars_ = covars
###############################################################

# Generate samples
X, Z = model.sample(500)

# Plot the sampled data
plt.plot(X[:, 0],
         X[:, 1],
         "-o",
         label="observations",

コード例 #6

0

ファイルを表示

 def test_bad_covariance_type(self):
     hmm.GaussianHMM(20, self.covariance_type)
     self.assertRaises(ValueError, hmm.GaussianHMM, 20,
                       'badcovariance_type')

コード例 #7

0

ファイルを表示

# Footprint color bed output
if(hmmType == "8"):
    stateNameDict = dict([(0,"BACK"), (1,"UPH"), (2,"TOPH"), (3,"DOWNH"), (4,"UPD"), (5,"TOPD"), (6,"DOWND"), (7,"FP")])
    colorDict = dict([(0,"50,50,50"), (1,"110,250,110"), (2,"90,180,240"), (3,"255,80,90"), (4,"10,80,0"), (5,"20,40,150"), (6,"150,20,40"), (7,"198,150,0")])
elif(hmmType == "4"):
    stateNameDict = dict([(0,"BACK"), (1,"HH"), (2,"DH"), (3,"FP")])
    colorDict = dict([(0,"50,50,50"), (1,"90,180,240"), (2,"10,80,0"), (3,"198,150,0")])

##################################################
### Applying HMM and creating posteriorList
##################################################

# Creating hmm
hmmStates, dimNo, startprob, transmat, means, covars = hmmFunctions.createHMM(hmmFileName,returnMode="sci")
if(covarType == "diag"): covars = aux.diagonalize(covars)
hmm = shmm.GaussianHMM(n_components=hmmStates, covariance_type=covarType, transmat=np.array(transmat), startprob=np.array(startprob))
hmm.means_ = np.array(means); hmm.covars_ = np.array(covars)

# Opening signals
signalFileList = []; signalBwList = []
for signalFileName in signalList:
    signalFileList.append(open(signalFileName,"r"))
    signalBwList.append(BigWigFile(signalFileList[-1]))

# Create output files list
coordName = coordFileName.split("/")[-1].split(".")[0]
outputBedFile = open(outputFileName,"w")

# Iterating on coordinate file
coordFile = open(coordFileName,"r")
for line in coordFile:

コード例 #8

0

ファイルを表示

ファイル: cnn_with_data.py プロジェクト: abbylester1/wikienergy

def main():

    #### Data ####

    # fake A/C params
    pi=np.array([0.1,0.9])
    a=np.array([[0.95,0.05],[0.05,0.95]])
    mean=np.array([[0],[1500]])
    cov=np.array([[[ 1.]],[[ 10]]])
    model=hmm.GaussianHMM(pi.size, "full", pi,a)
    model.means_ = mean
    model.covars_ = cov

    # randomly sample one day of data
    length = 4 * 24
    power, state = model.sample(length)

    #### CNN ####
    learning_rate = 0.1
    rng = np.random.RandomState(23455)

    ishape = (length, 1)  # this is the size of our input data
    batch_size = 20  # size of the minibatch

    # allocate symbolic variables for the data
    x = T.matrix('x')  # generated power levels
    y = T.lvector('y')  # generate states

    ##############################
    # BEGIN BUILDING ACTUAL MODEL
    ##############################

    # Reshape matrix of rasterized images of shape (batch_size,length,1)
    # to a 4D tensor, compatible with our ConvPoolLayer
    layer0_input = x.reshape((batch_size,1,length,1))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (96-3+1,1-1+1)=(94,1)
    # maxpooling reduces this further to (94/2,1/1) = (47,1)
    # 4D output tensor is thus of shape (20,20,47,1)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, length, 1),
            filter_shape=(20, 1, 3, 1), poolsize=(3, 1))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12 - 5 + 1, 12 - 5 + 1)=(8, 8)
    # maxpooling reduces this further to (8/2,8/2) = (4, 4)
    # 4D output tensor is thus of shape (20,50,4,4)
    #layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
    #        image_shape=(batch_size, 20, 12, 12),
    #        filter_shape=(50, 20, 5, 5), poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20, 20 * 47 * 1)
    layer2_input = layer0.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input,
                         n_in=20*47*1, n_out=50,
                         activation=T.tanh    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=50, n_out=1)


    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([x, y], layer3.errors(y))

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by SGD
    # Since this model has many parameters, it would be tedious to manually
    # create an update rule for each model parameter. We thus create the updates
    # dictionary by automatically looping over all (params[i],grads[i])  pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))
    train_model = theano.function([index], cost, updates = updates,
            givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]})

コード例 #9

0

ファイルを表示

import numpy as np
from sklearn import hmm

startprob = np.array([0.6, 0.3, 0.1])
transmat = np.array([[0.7, 0.2, 0.1], [0.3, 0.5, 0.2], [0.3, 0.3, 0.4]])
means = np.array([[0.0, 0.0], [3.0, -3.0], [5.0, 10.0]])
covars = np.tile(np.identity(2), (3, 1, 1))
model = hmm.GaussianHMM(3, "full", startprob, transmat)
model.means_ = means
model.covars_ = covars
X, Z = model.sample(100)
print X

model.fit([X])

コード例 #10

0

ファイルを表示

def stkHMM(lrndata, n_components):
    model = lrn.GaussianHMM(n_components, covariance_type="tied", n_iter=20)
    model.fit([lrndata])

    hidden_states = model.predict(lrndata)
    return [model, hidden_states]

コード例 #11

0

ファイルを表示

ファイル: demonstration_of_sampling_from_HMM.py プロジェクト: nitinbhardwaj00/Python

def createHMM(start_prob, trans_mat, means, covars):
    model = hmm.GaussianHMM(4, 'full', start_prob, trans_mat, random_state=42)
    model.means_ = means
    model.covars_ = covars
    return model, model.means_, model.covars_

コード例 #12

0

ファイルを表示

ファイル: make_fake_ac_dataset.py プロジェクト: abbylester1/wikienergy

def fake_hmm_appliance(pi,a,mean,cov):
    model=hmm.GaussianHMM(pi.size, "full", pi,a)
    model.means_ = mean
    model.covars_ = cov
    return model

コード例 #13

0

ファイルを表示

ファイル: Classification_simulation.py プロジェクト: Judonlee/scream-detection

def main():

    if dimension==1 :
#        gmm = np.zeros(number_of_components*size)
#        mu = np.zeros(number_of_components)
#        sigma = np.zeros(number_of_components)
#        for i in range(number_of_components) :
#            gmm[i*size:(i+1)*size], mu[i], sigma[i] = create_data(dimension,size,i)
        gmm = np.zeros((1,number_of_components*size),dtype=float)
        mu = np.zeros((number_of_components,1),dtype=float)
        sigma = np.zeros((number_of_components,1,1),dtype=float)
        matrix = np.zeros((number_of_components,number_of_components),dtype=float)

#        for i in range(number_of_components):
#            x, mu[i,0], sigma[i,0,0] = create_data(dimension,size,i)
    else:
        gmm = np.zeros((dimension,number_of_components*size),dtype=float)
        mu = np.zeros((number_of_components,dimension),dtype=float)
        sigma = np.zeros((number_of_components,dimension,dimension),dtype=float)
        matrix = np.zeros((number_of_components,number_of_components),dtype=float)

#        for i in range(number_of_components):
#            x, mu[i,:], sigma[i,:,:] = create_data(dimension,size,i)

    weights = np.array([0.6, 0.4])
    matrix = np.array([[0.7, 0.3], [0.1, 0.9]])
    model = hmm.GaussianHMM(2, "full", weights, matrix)
    model.means_ = mu
    model.covars_ = sigma
    gmm, Z = model.sample(number_of_components*size)

#    else :
#        gmm = np.zeros((dimension,number_of_components*size))
#        mu = np.zeros((number_of_components,dimension))
#        sigma = np.zeros((number_of_components,dimension,dimension))
#        for i in range(number_of_components) :
#            gmm[:,i*size:(i+1)*size], mu[i,:], sigma[i,:,:] = create_data(dimension,size,i)

    means, variances, pi, a = emHMM_algorithm(gmm,dimension,number_of_components,number_of_components*size)

#    num_bins = 50
#    n, bins, patches = plt.hist(gmm, num_bins, normed=1, facecolor='green', alpha=0.5)
#    # add a 'best fit' line
#    for i in range(number_of_components) :
#        y = mlab.normpdf(bins, means[i], variances[i])
#        plt.plot(bins, y, 'r--')
#        plt.xlabel('Values')
#        plt.ylabel('Probability')
#        plt.title('Data Histogram vs predicted distribution')
#
#    # Tweak spacing to prevent clipping of ylabel
#    plt.subplots_adjust(left=0.15)
#    plt.show()

    b = np.zeros((number_of_components,number_of_components*size))

    #Evaluate posterior
    if dimension==1:
        for i in range(number_of_components) :
        # Calculate the probability of seeing the observation given each state
            pdf = pi[i]*mlab.normpdf(gmm, means[i], variances[i,0])
            b[i,:] = pdf[:,0]

    else:
        centered_data = np.zeros((number_of_components,number_of_components*size,dimension))
        den = np.zeros((number_of_components,number_of_components*size))
        num = np.zeros((number_of_components,number_of_components*size))
        for i in range(number_of_components) :
        # Calculate the probability of seeing the observation given each state
            for n in range(number_of_components*size):
                centered_data[i, n, :] = gmm[n, :]-means[i, :]
                den[i,n] = np.sqrt((2*math.pi)**(dimension)*np.linalg.det(variances[i,:,:]))
                num[i,n] = np.exp((-1/2)*np.dot(np.dot(centered_data[i,n,:][np.newaxis],np.linalg.inv(variances[i,:,:])),centered_data[i,n,:][:,np.newaxis]))
                b[i,n] = num[i,n] / den[i,n]


    # Predict
    path, x, y = viterbi(size*number_of_components,a,b,pi)
    plt.figure();
    plt.plot(path[0,:],'ro')
    plt.plot(path[0,:],'r')
    plt.plot(Z,'g')
    plt.show()
    if dimension==1:
        print "initial means: ", mu[:,0], "\n", "initial variances: ", sigma[:,0,0], "\n", "initial weights: ", weights, "\n"
        print "means:", means, "\n" "sigmas:", variances, "\n", "weights:", pi, "\n"
        print "initial mixing mgmmatrix:", matrix, "\n"
        print "mixing matrix:", a, "\n"
    else:
        print "initial means: ", mu, "\n", "initial variances: ", sigma, "\n", "initial weights: ", weights, "\n"
        print "means:", means, "\n" "sigmas:", variances, "\n", "weights:", pi, "\n"
        print "initial mixing matrix:", matrix, "\n"
        print "mixing matrix:", a, "\n"