예제 #1
0
 def predict(self, x, init_prob=None, method='hmmlearn', window=-1):
     """Predict result based on HMM
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     if method == 'hmmlearn':
         model = MultinomialHMM(self.num_states, n_iter=100)
         model.n_features = self.num_observations
         model.startprob_ = init_prob
         model.emissionprob_ = self.B
         model.transmat_ = self.A
         if window == -1:
             result = model.predict(x)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = model.predict(x[0:window])
             for i in range(window, x.shape[0]):
                 result[i] = model.predict(x[i - window + 1:i + 1])[-1]
     else:
         if window == -1:
             result = self.decode(x, init_prob)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = self.decode(x[0:window], init_prob)
             for i in range(window, x.shape[0]):
                 result[i] = self.decode(x[i - window + 1:i + 1],
                                         init_prob)[-1]
     return result
예제 #2
0
파일: hmm.py 프로젝트: SmartDataLab/EML
def get_hmm(df, n_components, n_features):
    _, state_list = get_ubie_label(df["label"])
    pred_list = get_pred_for_hmm(df["pred"])
    clf = MultinomialHMM(n_components=n_components)
    clf.n_features = n_features
    clf.transmat_ = get_transmat(state_list)
    clf.emissionprob_ = get_emission(pred_list, state_list)
    clf.startprob_ = np.array([0.5, 0.05, 0.4, 0.05])
    return clf
예제 #3
0
 def predict_prob(self, x, init_prob=None, window=-1):
     """Predict the probability
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     model = MultinomialHMM(self.num_states)
     model.n_features = self.num_observations
     model.startprob_ = init_prob
     model.emissionprob_ = self.B
     model.transmat_ = self.A
     return model.predict_proba(x)
예제 #4
0
                       0.0, 0.0, 0.2, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2,
                       0.0, 0.3, 0.0, 0.0, 0.0
                   ],
                   [
                       0.0, 0.0, 0.3, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.0, 0.0, 0.0, 0.0, 0.0
                   ],
                   [
                       0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.0, 0.6, 0.4, 0.0, 0.0
                   ],
                   [
                       0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.0, 0.0, 0.0, 0.0, 0.0
                   ]])
hmmBol.n_features = 16
hmmBol.startprob_ = startprob
hmmBol.transmat_ = transmat
hmmBol.emissionprob_ = emmBol

# Position HMM
emmPos = np.array([[0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 1.0],
                   [0.0, 0.0, 0.3, 0.7, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0],
                   [0.5, 0.5, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0],
                   [0.0, 0.0, 0.8, 0.2, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]])
hmmPos.n_features = 5
hmmPos.startprob_ = startprob
hmmPos.transmat_ = transmat
hmmPos.emissionprob_ = emmPos

# Object HMM
예제 #5
0
 def computeHMM(dataset, alphabet, num_matchstates=9):
     num_sequences = len(dataset)
     best_score = None
     best_model = None
     alphabet = list(alphabet)
     residue_mapper = {alphabet[j]: j for j in range(0, len(alphabet))}
     #one begin, one end, num_matchstates + 1 insert states, num_matchstates match states, num_matchstates deletion states.
     num_states = 3 + 3 * num_matchstates
     concat_dataset = np.concatenate([[[residue_mapper[x]] for x in y]
                                      for y in dataset])
     dataset_lengths = [len(x) for x in dataset]
     for x in range(0, 10):
         transition_matrix = np.zeros((num_states, num_states))
         emission_matrix = np.zeros((num_states, len(alphabet)))
         #first num_matchstates + 2 are the matchstates (including beginning and end, though those two are mute
         #first do B, then M_1,...,M_m
         #B goes to either I_0 or M_1.
         b_row = ProfileHMM.compute_random_row(2)
         transition_matrix[0][1] = b_row[0]
         transition_matrix[0][2] = b_row[1]
         for i in range(1, num_matchstates + 1):
             #go to either match state, insertion state, or delete state.
             m_row = ProfileHMM.compute_random_row(3)
             #next match state
             transition_matrix[i][i + 1] = m_row[0]
             #insert state
             transition_matrix[i][i + num_matchstates + 2] = m_row[1]
             #deletion state
             print('i: %d' % i)
             transition_matrix[i][i + 2 * num_matchstates + 2] = m_row[2]
             emission_matrix[i] = ProfileHMM.compute_random_row(
                 len(alphabet))
         #now we do the insertion states.
         for i in range(num_matchstates + 2, 2 * num_matchstates + 3):
             #either go to self, or next match state.
             row = ProfileHMM.compute_random_row(2)
             transition_matrix[i][i] = row[0]
             transition_matrix[i][i - (num_matchstates + 1)] = row[1]
             emission_matrix[i] = ProfileHMM.compute_random_row(
                 len(alphabet))
         #now do deletion states. In the loop, do all but the last one
         for i in range(2 * num_matchstates + 3, 3 * num_matchstates + 2):
             row = ProfileHMM.compute_random_row(2)
             transition_matrix[i][i] = row[0]
             transition_matrix[i][i - 2 * num_matchstates - 1] = row[1]
         model = MultinomialHMM(num_states, params="ets")
         model.n_features = len(alphabet)
         start_prob = np.zeros(num_states)
         start_prob[0] = 1.0
         print('start prob array')
         print(start_prob)
         model.startprob_ = start_prob
         model.transmat_ = transition_matrix
         model.emissionprob_ = emission_matrix
         try:
             model.fit(concat_dataset, dataset_lengths)
         except ValueError:
             pdb.set_trace()
         print('model')
         print(model)
         """
         for row in range(0, len(model.emissionprob_)):
             for col in range(0, len(model.emissionprob_[row])):
                 count = model.emissionprob_[row][col]*num_sequences
                 model.emissionprob_[row][col] = (count + 0.01)/(num_sequences + len(alphabet)*0.01)
         """
         print('emission probabilities')
         print(model.emissionprob_)
         score = model.score(concat_dataset, dataset_lengths)
         if x == 0:
             best_score = score
             best_model = model
         elif score > best_score:
             best_score = score
             best_model = model
     return best_model
def buildHMM(num_states, n_iter=10, tol=0.01):
    model = MultinomialHMM(n_components=num_states, n_iter=n_iter, tol=tol)
    model.n_features = 3
    return model