def train_hmm(train_set, observations, index_features): '''Training the hmms...''' # symbols is a vector of inputs, each input is a vector of features (requires to be tuples by nltk) # we don't need to specify the states so we choose 1 and 2 trainer = HiddenMarkovModelTrainer(states=[1, 2], symbols=observations) hmms = {} for cat in train_set.keys(): print "Training HMM of cat:", cat tuple_sentences = [] for sentence in train_set[cat]: '''tuple_sentence = [(tuple(word),'') for word in sentence] tuple_sentences.append(tuple_sentence) ''' '''feature subset selection''' new_sentence = [] for word in sentence: new_word = [] for feature in index_features: new_word.append(word[feature]) new_sentence.append(new_word) tuple_sentence = [(tuple(word), '') for word in new_sentence] tuple_sentences.append(tuple_sentence) # sentence is a list of list! so w is a list of feature not only a word! hmms[cat] = trainer.train_unsupervised(tuple_sentences, max_iterations=10) return hmms
def train_hmm(train_set, observations, index_features): """Training the hmms...""" # symbols is a vector of inputs, each input is a vector of features (requires to be tuples by nltk) # we don't need to specify the states so we choose 1 and 2 trainer = HiddenMarkovModelTrainer(states=[1, 2], symbols=observations) hmms = {} for cat in train_set.keys(): print "Training HMM of cat:", cat tuple_sentences = [] for sentence in train_set[cat]: """tuple_sentence = [(tuple(word),'') for word in sentence] tuple_sentences.append(tuple_sentence) """ """feature subset selection""" new_sentence = [] for word in sentence: new_word = [] for feature in index_features: new_word.append(word[feature]) new_sentence.append(new_word) tuple_sentence = [(tuple(word), "") for word in new_sentence] tuple_sentences.append(tuple_sentence) # sentence is a list of list! so w is a list of feature not only a word! hmms[cat] = trainer.train_unsupervised(tuple_sentences, max_iterations=10) return hmms