def trainModel ( training_sequences, num_states, num_comp, covariance_type ): """ @return (GMMHMM): A GMM HMM model with @param num_states hidden states, @param num_comp GMM components per hidden state, and a covariance matrix of type @param covariance_type (full or diag), trained on @param training_sequences. """ model = GMMHMM(num_states, n_mix=num_comp, covariance_type=covariance_type) return model.fit(training_sequences)
def __init__(self, n_components=1, n_mix=1, startprob=None, transmat=None, startprob_prior=None, transmat_prior=None, algorithm="viterbi", gmms=None, covariance_type='diag', covars_prior=1e-2, random_state=None, n_iter=10, thresh=1e-2, params="", init_params=""): GMMHMM.__init__(self, n_components, n_mix, startprob, transmat, startprob_prior, transmat_prior, algorithm, gmms, covariance_type, covars_prior, random_state, n_iter, thresh, params, init_params)
def fit(self, reference, features): """Train HMM segmentation The resulting HMM will contain one state per labels in training set. Parameters ---------- reference : `Annotation` generator Generates annotations whose labels will be HMM states features : `Feature` generator Generates features synchronized with `reference` """ # gather training data reference = list(reference) features = list(features) # gather target list self.targets = self._get_targets(reference) # train each state for target in self.targets: logging.info('training {%s} GMM' % str(target)) self.gmm[target] = self._get_gmm(reference, features, target) # train HMM logging.info('training %d-states HMM' % len(self.targets)) self.hmm = GMMHMM( n_components=len(self.targets), gmms=[self.gmm[target] for target in self.targets], init_params='st', params='st' ) self.hmm.fit([f.data for f in features]) return self
def train(trainingData): # train one GMM for each state mixes = list() for state in xrange(1,6): # select data with current state label d = trainingData[trainingData.rating==state] # prepare data shape d = np.array(zip(*[d[f].values for f in pcas])) # init GMM gmm = GMM(num_mixc,cov_type) # train gmm.fit(d) mixes.append(gmm) # train HMM with init, trans, GMMs=mixes init,trans = hmm.hmmMlParams(trainingData,[1,2,3,4,5]) model = GMMHMM(n_components=5,init_params='',gmms=mixes) model.transmat_ = trans model.startprob_ = init return model
class NaiveHMM(object): def __init__(self, num_states, hmm_type=0, n_mix=3, n_iter=100, n_components=3): if hmm_type == 0: self._hmm = GaussianHMM(n_mix, n_iter=n_iter) elif hmm_type == 1: # WARNING: VERY VERY VERY SLOW self._hmm = GMMHMM(n_mix=n_mix, n_components=n_components, n_iter=n_iter) def fit(self, data): """ @param data DataFrame object containing T,X,Y,Z values. """ train = np.column_stack([data.X.tolist(), data.Y.tolist(), data.Z.tolist()]) self._hmm.fit([train]) def score(self, test): """ @param test DataFrame object containing T,X,Y,Z values. """ t = np.column_stack([test.X.tolist(), test.Y.tolist(), test.Z.tolist()]) return self._hmm.score(t) def get_hmm(self): return self._hmm
class SegmentationHMM(object): """HMM-based segmentation with Viterbi decoding Parameters ---------- n_components : int Number of gaussians per HMM state (default is 1). covariance_type : {'diag', 'full'} Type of gaussian covariance matrices sampling : int, optional Reduce the number of samples used for the initialization steps to `sampling` samples per component. A few hundreds samples per component should be a reasonable rule of thumb. The final estimation steps always use the whole sample set. min_duration : float, optional Filter out segments shorter than `min_duration` seconds n_jobs : int Number of parallel jobs for GMM estimation (default is one core) """ def __init__( self, n_components=1, covariance_type='diag', sampling=0, min_duration=None, n_jobs=1 ): super(SegmentationHMM, self).__init__() self.n_components = n_components self.covariance_type = covariance_type self.sampling = sampling self.n_jobs = n_jobs self.min_duration = min_duration self.gmm = {} def _get_targets(self, reference): """Get list of targets from training data Parameters ---------- reference : `Annotation` iterable Returns ------- targets : list Sorted list of 'known' targets """ # empty target set targets = set() for annotation in reference: labels = [ L for L in annotation.labels() if not isinstance(L, Unknown) ] targets.update(labels) return sorted(targets) def _get_gmm(self, reference, features, target): # gather target data data = np.vstack([ f.crop(r.label_coverage(target)) # use target regions only for r, f in itertools.izip(reference, features) ]) lbg = LBG( n_components=self.n_components, covariance_type=self.covariance_type, sampling=self.sampling, n_iter=10, disturb=0.05 ) gmm = lbg.apply(data) return gmm def fit(self, reference, features): """Train HMM segmentation The resulting HMM will contain one state per labels in training set. Parameters ---------- reference : `Annotation` generator Generates annotations whose labels will be HMM states features : `Feature` generator Generates features synchronized with `reference` """ # gather training data reference = list(reference) features = list(features) # gather target list self.targets = self._get_targets(reference) # train each state for target in self.targets: logging.info('training {%s} GMM' % str(target)) self.gmm[target] = self._get_gmm(reference, features, target) # train HMM logging.info('training %d-states HMM' % len(self.targets)) self.hmm = GMMHMM( n_components=len(self.targets), gmms=[self.gmm[target] for target in self.targets], init_params='st', params='st' ) self.hmm.fit([f.data for f in features]) return self def apply(self, features): """ Parameters ---------- features : SlidingWindowFeatures """ # predict state sequences sequence = self.hmm.predict(features.data) # median filtering to get rid of short segments if self.min_duration: if len(self.targets) > 2: raise NotImplementedError( 'min_duration is not supported with more than 2 states.' ) dummy = Segment(0, self.min_duration) _, n = features.sliding_window.segmentToRange(dummy) sequence = median_filter(sequence, size=2*n+1) # start initial segment start = 0 label = self.targets[sequence[0]] segmentation = Annotation() for i, d in enumerate(np.diff(sequence)): if d == 0: continue # end of current segment end = i segment = features.sliding_window.rangeToSegment(start, end-start) segmentation[segment, '_'] = label # start of a new segment label = self.targets[sequence[i+1]] start = end segment = Segment(segment.end, features.getExtent().end) segmentation[segment, '_'] = label return segmentation
GMMs = [] for state_index in range(n_states): g = GMM( n_components=m_components, covariance_type=covariance_type, #n_iter=0, # this initialises the GMMs without optimising them through EM; this is done later in the GMM HMM model ) # n_iter = 5(...) better?? g.fit(observations_per_state[state_index]) GMMs.append(g) # Initialise and train new GMM HMM model print "\tTraining GMM HMM model..." # Implements (6.), (7.) gmm_model = GMMHMM( n_components=n_states, n_mix=m_components, startprob=previous_model.startprob_, transmat=previous_model.transmat_, gmms=GMMs, covariance_type=covariance_type, n_iter=num_EM_iterations, init_params='' # initialisation through previous model and GMMs! ) gmm_model.fit(training_sequences) # save base model print "\tSaving model to file..." saveModel(gmm_model, 'gmm_model_%scomponents' % m_components, observation_sequences[0].getFeatureNames()) # tag training data using new model print "\tTagging training data using base model..." likelihood_of_training_data, observations_per_state = tagTrainingData( gmm_model, training_sequences, list(observation_sequences), # pass a copy save='gmm_model_%scomponents/tagged_training_data' % m_components, filenames=filenames )
import csv from hmmlearn import hmm import os import glob import pandas as pd from sklearn.hmm import GMMHMM from sklearn.externals import joblib def apply_pca(dataset): pcaModel = joblib.load('./pca2.pkl') return pcaModel.transform(dataset) gesture = 'Sorry' os.chdir('./'+gesture) file_list = [i for i in glob.glob('*.csv')] os.chdir('../') print file_list lengths = [] X = [] for file_name in file_list: print file_name dataset = pd.read_csv(gesture+'/'+file_name) dataset = apply_pca(dataset.values) lengths.append(len(dataset)) X.append(dataset) # print X model = GMMHMM(n_components=7,n_mix=128,covariance_type="diag") model.fit(X) joblib.dump(model,'./HMMs/'+gesture+'.pkl')
def __init__(self, num_states, hmm_type=0, n_mix=3, n_iter=100, n_components=3): if hmm_type == 0: self._hmm = GaussianHMM(n_mix, n_iter=n_iter) elif hmm_type == 1: # WARNING: VERY VERY VERY SLOW self._hmm = GMMHMM(n_mix=n_mix, n_components=n_components, n_iter=n_iter)