def test_score_samples_and_decode(self): h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.means_ = self.means h.covars_ = self.covars[self.covariance_type] # Make sure the means are far apart so posteriors.argmax() # picks the actual component used to generate the observations. h.means_ = 20 * h.means_ gaussidx = np.repeat(np.arange(self.n_components), 5) nobs = len(gaussidx) obs = self.prng.randn(nobs, self.n_features) + h.means_[gaussidx] ll, posteriors = h.score_samples(obs) self.assertEqual(posteriors.shape, (nobs, self.n_components)) assert_array_almost_equal(posteriors.sum(axis=1), np.ones(nobs)) viterbi_ll, stateseq = h.decode(obs) assert_array_equal(stateseq, gaussidx)
def test_fit_with_priors(self, params='stmc', n_iter=10, verbose=False): startprob_prior = 10 * self.startprob + 2.0 transmat_prior = 10 * self.transmat + 2.0 means_prior = self.means means_weight = 2.0 covars_weight = 2.0 if self.covariance_type in ('full', 'tied'): covars_weight += self.n_features covars_prior = self.covars[self.covariance_type] h = hmm.GaussianHMM(self.n_components, self.covariance_type) h.startprob_ = self.startprob h.startprob_prior = startprob_prior h.transmat_ = hmm.normalize( self.transmat + np.diag(self.prng.rand(self.n_components)), 1) h.transmat_prior = transmat_prior h.means_ = 20 * self.means h.means_prior = means_prior h.means_weight = means_weight h.covars_ = self.covars[self.covariance_type] h.covars_prior = covars_prior h.covars_weight = covars_weight # Create training data by sampling from the HMM. train_obs = [h.sample(n=10)[0] for x in xrange(10)] # Mess up the parameters and see if we can re-learn them. h.fit(train_obs[:1], n_iter=0) trainll = train_hmm_and_keep_track_of_log_likelihood(h, train_obs, n_iter=n_iter, params=params)[1:] # Check that the loglik is always increasing during training if not np.all(np.diff(trainll) > 0) and verbose: print print('Test MAP train: %s (%s)\n %s\n %s' % (self.covariance_type, params, trainll, np.diff(trainll))) # XXX: Why such a large tolerance? self.assertTrue(np.all(np.diff(trainll) > -0.5))
def make_model_metric_pickle(state1_min_mean, state1_max_mean, state1_min_cov, state1_max_cov, num_iter, num_test_houses): devices_types = get_test_data(num_test_houses) errors_mean_cov = [] error_dict = {} device_type_name = 'air1' state1_means = np.linspace(state1_min_mean, state1_max_mean, num_iter) state1_covs = np.linspace(state1_min_cov, state1_max_cov, num_iter) best_error = 100000000 for state1_mean in state1_means: errors_cov = [] for state1_cov in state1_covs: print str(state1_mean) + ", " + str(state1_cov) pi_prior = np.array([0.9, 0.1]) a_prior = np.array([[0.95, 0.05], [0.05, 0.95]]) mean_prior = np.array([[0], [state1_mean]]) cov_prior = np.array([[[0.0001]], [[state1_cov]]]) model = hmm.GaussianHMM(pi_prior.size, 'full', pi_prior, a_prior) model.means_ = mean_prior model.covars_ = cov_prior error = get_model_error_from_trace0( devices_types[device_type_name].instances, model) if (error < best_error): best_mean = state1_mean best_cov = state1_cov best_error = error errors_cov.append(error) errors_mean_cov.append(errors_cov) error_dict['error_vals'] = errors_mean_cov error_dict['cov_vals'] = state1_covs error_dict['mean_vals'] = state1_means error_dict['best_mean'] = best_mean error_dict['best_cov'] = best_cov error_dict['best_error'] = best_error with open( 'error_m_' + str(state1_min_mean) + '_' + str(state1_max_mean) + '_c_' + str(state1_min_cov) + '_' + str(state1_max_cov) + '.pkl', 'w') as f: pickle.dump(error_dict, f)
def init_HMM(pi_prior, a_prior, mean_prior, cov_prior): ''' Initializes a trace object from a series and a metadata dictionary. Series must be sampled at a particular sample rate pi_prior is the starting probability of the HMM a_prior is the transition matrix of the HMM mean_prior is the initial mean value of each state cov_prior is the initial covariance of each state For an n-state HMM: * pi_prior is a 1-D numpy array of size n * a_prior is a 2-D numpy array of size n x n * mean_prior is an numpy array of size n * cov_prior is a 3-D numpy array that has been tiled into two rows, one column, and n third dimensional states. * ex) np.tile(1,(2,1,n)) for uniform covariance to start with. ''' model = hmm.GaussianHMM(pi_prior.size, 'full', pi_prior, a_prior) model.means_ = mean_prior model.covars_ = cov_prior return model
# The transition matrix, note that there are no transitions possible # between component 1 and 4 trans_mat = np.array([[0.7, 0.2, 0.0, 0.1], [0.3, 0.5, 0.2, 0.0], [0.0, 0.3, 0.5, 0.2], [0.2, 0.0, 0.2, 0.6]]) # The means of each component means = np.array([ [0.0, 0.0], [0.0, 11.0], [9.0, 10.0], [11.0, -1.0], ]) # The covariance of each component covars = .5 * np.tile(np.identity(2), (4, 1, 1)) # Build an HMM instance and set parameters model = hmm.GaussianHMM(4, "full", start_prob, trans_mat, random_state=42) # Instead of fitting it from the data, we directly set the estimated # parameters, the means and covariance of the components model.means_ = means model.covars_ = covars ############################################################### # Generate samples X, Z = model.sample(500) # Plot the sampled data plt.plot(X[:, 0], X[:, 1], "-o", label="observations",
def test_bad_covariance_type(self): hmm.GaussianHMM(20, self.covariance_type) self.assertRaises(ValueError, hmm.GaussianHMM, 20, 'badcovariance_type')
# Footprint color bed output if(hmmType == "8"): stateNameDict = dict([(0,"BACK"), (1,"UPH"), (2,"TOPH"), (3,"DOWNH"), (4,"UPD"), (5,"TOPD"), (6,"DOWND"), (7,"FP")]) colorDict = dict([(0,"50,50,50"), (1,"110,250,110"), (2,"90,180,240"), (3,"255,80,90"), (4,"10,80,0"), (5,"20,40,150"), (6,"150,20,40"), (7,"198,150,0")]) elif(hmmType == "4"): stateNameDict = dict([(0,"BACK"), (1,"HH"), (2,"DH"), (3,"FP")]) colorDict = dict([(0,"50,50,50"), (1,"90,180,240"), (2,"10,80,0"), (3,"198,150,0")]) ################################################## ### Applying HMM and creating posteriorList ################################################## # Creating hmm hmmStates, dimNo, startprob, transmat, means, covars = hmmFunctions.createHMM(hmmFileName,returnMode="sci") if(covarType == "diag"): covars = aux.diagonalize(covars) hmm = shmm.GaussianHMM(n_components=hmmStates, covariance_type=covarType, transmat=np.array(transmat), startprob=np.array(startprob)) hmm.means_ = np.array(means); hmm.covars_ = np.array(covars) # Opening signals signalFileList = []; signalBwList = [] for signalFileName in signalList: signalFileList.append(open(signalFileName,"r")) signalBwList.append(BigWigFile(signalFileList[-1])) # Create output files list coordName = coordFileName.split("/")[-1].split(".")[0] outputBedFile = open(outputFileName,"w") # Iterating on coordinate file coordFile = open(coordFileName,"r") for line in coordFile:
def main(): #### Data #### # fake A/C params pi=np.array([0.1,0.9]) a=np.array([[0.95,0.05],[0.05,0.95]]) mean=np.array([[0],[1500]]) cov=np.array([[[ 1.]],[[ 10]]]) model=hmm.GaussianHMM(pi.size, "full", pi,a) model.means_ = mean model.covars_ = cov # randomly sample one day of data length = 4 * 24 power, state = model.sample(length) #### CNN #### learning_rate = 0.1 rng = np.random.RandomState(23455) ishape = (length, 1) # this is the size of our input data batch_size = 20 # size of the minibatch # allocate symbolic variables for the data x = T.matrix('x') # generated power levels y = T.lvector('y') # generate states ############################## # BEGIN BUILDING ACTUAL MODEL ############################## # Reshape matrix of rasterized images of shape (batch_size,length,1) # to a 4D tensor, compatible with our ConvPoolLayer layer0_input = x.reshape((batch_size,1,length,1)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (96-3+1,1-1+1)=(94,1) # maxpooling reduces this further to (94/2,1/1) = (47,1) # 4D output tensor is thus of shape (20,20,47,1) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, length, 1), filter_shape=(20, 1, 3, 1), poolsize=(3, 1)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12 - 5 + 1, 12 - 5 + 1)=(8, 8) # maxpooling reduces this further to (8/2,8/2) = (4, 4) # 4D output tensor is thus of shape (20,50,4,4) #layer1 = LeNetConvPoolLayer(rng, input=layer0.output, # image_shape=(batch_size, 20, 12, 12), # filter_shape=(50, 20, 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20, 20 * 47 * 1) layer2_input = layer0.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=20*47*1, n_out=50, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=50, n_out=1) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function([x, y], layer3.errors(y)) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by SGD # Since this model has many parameters, it would be tedious to manually # create an update rule for each model parameter. We thus create the updates # dictionary by automatically looping over all (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], cost, updates = updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]})
import numpy as np from sklearn import hmm startprob = np.array([0.6, 0.3, 0.1]) transmat = np.array([[0.7, 0.2, 0.1], [0.3, 0.5, 0.2], [0.3, 0.3, 0.4]]) means = np.array([[0.0, 0.0], [3.0, -3.0], [5.0, 10.0]]) covars = np.tile(np.identity(2), (3, 1, 1)) model = hmm.GaussianHMM(3, "full", startprob, transmat) model.means_ = means model.covars_ = covars X, Z = model.sample(100) print X model.fit([X])
def stkHMM(lrndata, n_components): model = lrn.GaussianHMM(n_components, covariance_type="tied", n_iter=20) model.fit([lrndata]) hidden_states = model.predict(lrndata) return [model, hidden_states]
def createHMM(start_prob, trans_mat, means, covars): model = hmm.GaussianHMM(4, 'full', start_prob, trans_mat, random_state=42) model.means_ = means model.covars_ = covars return model, model.means_, model.covars_
def fake_hmm_appliance(pi,a,mean,cov): model=hmm.GaussianHMM(pi.size, "full", pi,a) model.means_ = mean model.covars_ = cov return model
def main(): if dimension==1 : # gmm = np.zeros(number_of_components*size) # mu = np.zeros(number_of_components) # sigma = np.zeros(number_of_components) # for i in range(number_of_components) : # gmm[i*size:(i+1)*size], mu[i], sigma[i] = create_data(dimension,size,i) gmm = np.zeros((1,number_of_components*size),dtype=float) mu = np.zeros((number_of_components,1),dtype=float) sigma = np.zeros((number_of_components,1,1),dtype=float) matrix = np.zeros((number_of_components,number_of_components),dtype=float) # for i in range(number_of_components): # x, mu[i,0], sigma[i,0,0] = create_data(dimension,size,i) else: gmm = np.zeros((dimension,number_of_components*size),dtype=float) mu = np.zeros((number_of_components,dimension),dtype=float) sigma = np.zeros((number_of_components,dimension,dimension),dtype=float) matrix = np.zeros((number_of_components,number_of_components),dtype=float) # for i in range(number_of_components): # x, mu[i,:], sigma[i,:,:] = create_data(dimension,size,i) weights = np.array([0.6, 0.4]) matrix = np.array([[0.7, 0.3], [0.1, 0.9]]) model = hmm.GaussianHMM(2, "full", weights, matrix) model.means_ = mu model.covars_ = sigma gmm, Z = model.sample(number_of_components*size) # else : # gmm = np.zeros((dimension,number_of_components*size)) # mu = np.zeros((number_of_components,dimension)) # sigma = np.zeros((number_of_components,dimension,dimension)) # for i in range(number_of_components) : # gmm[:,i*size:(i+1)*size], mu[i,:], sigma[i,:,:] = create_data(dimension,size,i) means, variances, pi, a = emHMM_algorithm(gmm,dimension,number_of_components,number_of_components*size) # num_bins = 50 # n, bins, patches = plt.hist(gmm, num_bins, normed=1, facecolor='green', alpha=0.5) # # add a 'best fit' line # for i in range(number_of_components) : # y = mlab.normpdf(bins, means[i], variances[i]) # plt.plot(bins, y, 'r--') # plt.xlabel('Values') # plt.ylabel('Probability') # plt.title('Data Histogram vs predicted distribution') # # # Tweak spacing to prevent clipping of ylabel # plt.subplots_adjust(left=0.15) # plt.show() b = np.zeros((number_of_components,number_of_components*size)) #Evaluate posterior if dimension==1: for i in range(number_of_components) : # Calculate the probability of seeing the observation given each state pdf = pi[i]*mlab.normpdf(gmm, means[i], variances[i,0]) b[i,:] = pdf[:,0] else: centered_data = np.zeros((number_of_components,number_of_components*size,dimension)) den = np.zeros((number_of_components,number_of_components*size)) num = np.zeros((number_of_components,number_of_components*size)) for i in range(number_of_components) : # Calculate the probability of seeing the observation given each state for n in range(number_of_components*size): centered_data[i, n, :] = gmm[n, :]-means[i, :] den[i,n] = np.sqrt((2*math.pi)**(dimension)*np.linalg.det(variances[i,:,:])) num[i,n] = np.exp((-1/2)*np.dot(np.dot(centered_data[i,n,:][np.newaxis],np.linalg.inv(variances[i,:,:])),centered_data[i,n,:][:,np.newaxis])) b[i,n] = num[i,n] / den[i,n] # Predict path, x, y = viterbi(size*number_of_components,a,b,pi) plt.figure(); plt.plot(path[0,:],'ro') plt.plot(path[0,:],'r') plt.plot(Z,'g') plt.show() if dimension==1: print "initial means: ", mu[:,0], "\n", "initial variances: ", sigma[:,0,0], "\n", "initial weights: ", weights, "\n" print "means:", means, "\n" "sigmas:", variances, "\n", "weights:", pi, "\n" print "initial mixing mgmmatrix:", matrix, "\n" print "mixing matrix:", a, "\n" else: print "initial means: ", mu, "\n", "initial variances: ", sigma, "\n", "initial weights: ", weights, "\n" print "means:", means, "\n" "sigmas:", variances, "\n", "weights:", pi, "\n" print "initial mixing matrix:", matrix, "\n" print "mixing matrix:", a, "\n"