def predict(self, word): X = self.vec.transform(word) scores = X.dot(self.coef_.T).toarray() y = viterbi.decode(scores, self.intercept_trans_, self.intercept_init_, self.intercept_final_) y = [self.classes_[pred] for pred in y] return re.sub('_','',''.join(y))
def predict(self, word): X = self.vec.transform(word) scores = X.dot(self.coef_.T).toarray() y = viterbi.decode(scores, self.intercept_trans_, self.intercept_init_, self.intercept_final_) y = [self.classes_[pred] for pred in y] return re.sub('_', '', ''.join(y))
def decode_sentence(s, model): return decode(s, model)
from corpus import get_data #,get_lexicon from hmm import get_HMM from viterbi import decode import random # test #train_set = [ random.choice(sents) for i in range(0,200) ] train_set = get_data("corpus.xml")[:200] model = get_HMM(train_set) s = random.choice(train_set) #s = sents[4023] print("phrase: ", s["num"]) s["decoded"] = decode(s, model) for i in range(0, s["len"]): if s["tokens"][i] in forget_words: print("*UNK*\t", s["tokens"][i], "\t", s["tags"][i], "\t", s["decoded"][i]) else: print(" \t", s["tokens"][i], "\t", s["tags"][i], "\t", s["decoded"][i])
for i in xrange(num_states): trans_probs.append([0]*num_states) for i in xrange(num_states): trans_probs[i] = normalise(dict_trans[i],num_states) return trans_probs def get_obs_probs(dict_obsInState, num_states, num_obs): obs_probs = [] for i in xrange(num_states): obs_probs.append([0]*num_obs) for i in xrange(num_states): obs_probs[i] = normalise(dict_obsInState[i],num_obs) return obs_probs l = get_data("train.txt") list_seq = get_seq(l) a = process_seq(list_seq) first_seq = list_seq[0] init_probs = get_initial_probs(a[-1],len(a[1])) trans_probs = get_trans_probs(a[2],len(a[1])) obs_probs = get_obs_probs(a[-2],len(a[1]),len(a[0])) num_states = len(a[1]) num_observs = len(a[0]) obs_seq = [a[0][i[0]] for i in first_seq] time_steps = len(obs_seq) l = decode(num_observs,num_states,time_steps,init_probs,trans_probs,obs_probs,obs_seq)