def train_hmm(train_set, observations, index_features):
    '''Training the hmms...'''
    # symbols is a vector of inputs, each input is a vector of features (requires to be tuples by nltk)
    # we don't need to specify the states so we choose 1 and 2
    trainer = HiddenMarkovModelTrainer(states=[1, 2], symbols=observations)
    hmms = {}
    for cat in train_set.keys():
        print "Training HMM of cat:", cat
        tuple_sentences = []
        for sentence in train_set[cat]:
            '''tuple_sentence = [(tuple(word),'') for word in sentence]
            tuple_sentences.append(tuple_sentence)
            '''
            '''feature subset selection'''
            new_sentence = []
            for word in sentence:
                new_word = []
                for feature in index_features:
                    new_word.append(word[feature])
                new_sentence.append(new_word)
            tuple_sentence = [(tuple(word), '') for word in new_sentence]
            tuple_sentences.append(tuple_sentence)

            # sentence is a list of list! so w is a list of feature not only a word!
        hmms[cat] = trainer.train_unsupervised(tuple_sentences,
                                               max_iterations=10)
    return hmms
コード例 #2
0
def train_hmm(train_set, observations, index_features):
    """Training the hmms..."""
    # symbols is a vector of inputs, each input is a vector of features (requires to be tuples by nltk)
    # we don't need to specify the states so we choose 1 and 2
    trainer = HiddenMarkovModelTrainer(states=[1, 2], symbols=observations)
    hmms = {}
    for cat in train_set.keys():
        print "Training HMM of cat:", cat
        tuple_sentences = []
        for sentence in train_set[cat]:

            """tuple_sentence = [(tuple(word),'') for word in sentence]
            tuple_sentences.append(tuple_sentence)
            """
            """feature subset selection"""
            new_sentence = []
            for word in sentence:
                new_word = []
                for feature in index_features:
                    new_word.append(word[feature])
                new_sentence.append(new_word)
            tuple_sentence = [(tuple(word), "") for word in new_sentence]
            tuple_sentences.append(tuple_sentence)

            # sentence is a list of list! so w is a list of feature not only a word!
        hmms[cat] = trainer.train_unsupervised(tuple_sentences, max_iterations=10)
    return hmms