def main():
    hmm = MultinomialHMM(n_components=5)
    
    T = np.random.random(size=(5, 5))
    T = T/T.sum(axis=1).reshape((5, 1))
    hmm.transmat_ = T

    pi = np.random.random(size=(5,))
    pi = pi/pi.sum()
    hmm.startprob_ = pi

    emit = np.random.random(size=(5, 10))
    emit = emit/emit.sum(axis=1).reshape((5, 1))
    hmm.emissionprob_ = emit

    X = np.zeros((20, 25)).astype(np.int)
    for i in range(20):
        x, _ = hmm.sample(n_samples=25)
        X[i] = x.reshape((25,))

    # load the PyTorch HMM
    phmm = HMM(z_dim=5, x_dim=10)
    phmm.T = torch.Tensor(T.T)
    phmm.pi = torch.Tensor(pi)
    phmm.emit = torch.Tensor(emit.T)

    # compute PyTorch HMM forward-backward
    my_marginals = phmm.log_marginal(torch.Tensor(X.T))

    # compute hmmlearn version
    true_marginals = np.zeros(20)
    for i in range(20):
        true_marginals[i] = hmm.score(X[i].reshape((-1, 1)))

    assert np.abs(true_marginals - my_marginals.numpy()).max() < 1e-4
Exemple #2
0
def buildHMM(HMMFactory):

    model = MultinomialHMM(n_components=2, n_iter=200)
    model.startprob_ = HMMFactory.hiddenProb()
    model.transmat_ = HMMFactory.transMatrix()
    model.emissionprob_ = HMMFactory.emissionMatrix()
    return model
Exemple #3
0
def create_hmm_data(N, seq_len, x_dim, z_dim, params=None):
    from hmmlearn.hmm import MultinomialHMM  # introduces a lot of dependencies
    hmm = MultinomialHMM(n_components=z_dim)

    if params is None:
        T = np.random.random(size=(z_dim, z_dim))
        T = T/T.sum(axis=1).reshape((z_dim, 1))

        pi = np.random.random(size=(z_dim,))
        pi = pi/pi.sum()

        emit = np.random.random(size=(z_dim, x_dim))
        emit = emit/emit.sum(axis=1).reshape((z_dim, 1))
    else:
        T, pi, emit = params

    hmm.transmat_ = T
    hmm.startprob_ = pi
    hmm.emissionprob_ = emit

    X = np.zeros((N, seq_len)).astype(np.int)
    for i in range(N):
        x, _ = hmm.sample(n_samples=seq_len)
        X[i] = x.reshape((seq_len,))

    return (T, pi, emit), HMMData(X)
Exemple #4
0
 def predict(self, x, init_prob=None, method='hmmlearn', window=-1):
     """Predict result based on HMM
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     if method == 'hmmlearn':
         model = MultinomialHMM(self.num_states, n_iter=100)
         model.n_features = self.num_observations
         model.startprob_ = init_prob
         model.emissionprob_ = self.B
         model.transmat_ = self.A
         if window == -1:
             result = model.predict(x)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = model.predict(x[0:window])
             for i in range(window, x.shape[0]):
                 result[i] = model.predict(x[i - window + 1:i + 1])[-1]
     else:
         if window == -1:
             result = self.decode(x, init_prob)
         else:
             result = np.zeros(x.shape[0], dtype=np.int)
             result[0:window] = self.decode(x[0:window], init_prob)
             for i in range(window, x.shape[0]):
                 result[i] = self.decode(x[i - window + 1:i + 1],
                                         init_prob)[-1]
     return result
Exemple #5
0
    def test_viterbi_case_random(self):
        for i in range(1000):
            # init
            self.n_state = np.random.randint(1,10)
            self.n_output = np.random.randint(1,10)
            self.step = np.random.randint(1,200)
            p = np.random.random(self.n_state)
            startprob = p/p.sum()
            p = np.random.random((self.n_state,self.n_state))
            transmat = p/p.sum(axis=1).reshape(-1,1)
            p = np.random.random((self.n_state,self.n_output))
            emissionprob = p/p.sum(axis=1).reshape(-1,1)
            X = np.random.choice(self.n_output,self.step).reshape(-1,1)

            # hmmlearn
            model = MultinomialHMM(n_components=self.n_state,)
            model.startprob_ = startprob
            model.transmat_ = transmat
            model.emissionprob_ = emissionprob
            y = model.predict(X)

            # my hmm
            hmm = HMM()
            pred = hmm.viterbi(startprob, transmat, emissionprob, X)
            self.assertTrue(np.array_equal(y, pred))
Exemple #6
0
    def initHMM(self, length):
        a = 1.0 / length

        # Transition probabilities
        trans = np.array([[1-a,   a,   0,   0],   # Pre ->
                          [  0, 1-a, a/2, a/2],   # HQ  ->
                          [  0,   0,   1,   0],   # PostQuiet ->
                          [  0,   0,   0,   1] ]) # PostActive ->

        # emission probabilities
        eps = 1e-4
        emit = np.array([[ 0.25, 0.25, 0.50 ],    # Emit | Pre
                         [ 0.16, 0.84-eps, eps ], # Emit | HQ
                         [ 0.90, 0.10-eps, eps ], # Emit | PostQuiet
                         [ 0.25, 0.25, 0.50 ] ])  # Emit | PostActive
        #                   A0    A1    A2

        # Start state distribution
        start = np.array([0.34, 0.33, 0.33, 0])

        hmm = MultinomialHMM(n_components=nStates)
        hmm.transmat_ = trans
        hmm.startprob_ = start
        hmm.emissionprob_ = emit
        return hmm
Exemple #7
0
 def get_model(self):
     """
     初始化hmm模型
     """
     model = MultinomialHMM(n_components=len(self.states))
     model.startprob_ = self.init_p
     model.transmat_ = self.trans_p
     model.emissionprob_ = self.emit_p
     return model
Exemple #8
0
def get_hmm(df, n_components, n_features):
    _, state_list = get_ubie_label(df["label"])
    pred_list = get_pred_for_hmm(df["pred"])
    clf = MultinomialHMM(n_components=n_components)
    clf.n_features = n_features
    clf.transmat_ = get_transmat(state_list)
    clf.emissionprob_ = get_emission(pred_list, state_list)
    clf.startprob_ = np.array([0.5, 0.05, 0.4, 0.05])
    return clf
Exemple #9
0
 def get_model(self):
     """ returns a multinomial hmm"""
     model = MultinomialHMM(n_components=self.get_max(),
                            params='e',
                            init_params='')
     model.startprob_ = self.get_start()
     model.transmat_ = self.get_transition()
     model.emissionprob_ = self.get_emission()
     return model
Exemple #10
0
 def get_model(self):
     """
     初始化hmm模型
     """
     model = MultinomialHMM(n_components=len(self.states))
     model.startprob_ = self.init_p
     model.transmat_ = self.trans_p
     model.emissionprob_ = self.emit_p
     return model
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95):
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)
    
    
    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if(mahal_list[i] > threshold or c_list[i]==1):
            symbols.append(1)
        else:
            symbols.append(0)
    
    
    # Set up the hidden markov model.  We are modeling the non-event states as "0"
    # and event states as "1"
    
    # Transition matrix with heavy weight on the diagonals ensures that the model
    # is likely to stick in the same state rather than rapidly switching.  In other
    # words, the predictions will be relatively "smooth"
    trans_matrix = array([[.999, .001],
                      [.001,.999]])

    # Emission matrix - state 0 is likely to emit symbol 0, and vice versa
    # In other words, events are likely to be outliers
    emission_matrix = array([[.95, .05],
                             [.4, .6]])
    
    # Actually set up the hmm
    model = MultinomialHMM(n_components=2, transmat=trans_matrix)
    model.emissionprob_ = emission_matrix
    
    # Make the predictions
    lnl, predictions = model.decode(symbols)
    
    events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list,
                            expected_pace_list)
    
    # Sort events by duration, starting with the long events
    events.sort(key = lambda x: x[2], reverse=True)
    return events, predictions
Exemple #12
0
 def predict_prob(self, x, init_prob=None, window=-1):
     """Predict the probability
     """
     if init_prob is None:
         init_prob = np.array(
             [1 / self.num_states for i in range(self.num_states)])
     model = MultinomialHMM(self.num_states)
     model.n_features = self.num_observations
     model.startprob_ = init_prob
     model.emissionprob_ = self.B
     model.transmat_ = self.A
     return model.predict_proba(x)
def run_hmm_model(input_df, n_unique, A_df, Eta, n_iter = 10000, 
                        tol=1e-2, verbose = False, params = 'e', init_params = ''):
    '''
        Runs the hmm model and returns the predicted results, score and model 

            input_df : The dataframe of keypresses 

            n_unique : number of unqique chars 


            A_df : Dataframe of trasnmission matrix 

            Eta : Emissions matrix 

            n_iter : Max number of iterations for hmm

            tol : The value to stop the hmm model if score does not improve by more than this 

            verbose : Whether or not to print out 

            params : Parameters to tune 

            init_params : Paramters to initialize
    '''
    # Propotion of characters starting words in english 
    char_counts = get_char_counts()

    # Construct model 
    hmm = MultinomialHMM(n_components=n_unique, startprob_prior=np.append(0, char_counts.values), 
               transmat_prior=A_df.values, algorithm='viterbi', 
               random_state=None, n_iter=n_iter, tol=tol, 
               verbose=verbose, params=params, init_params=init_params)
    
    # Set values 
    hmm.emissionprob_ = Eta
    hmm.transmat_ = A_df.values
    hmm.startprob_ = np.append(0, char_counts.values)

    # Feed in the clusters as the expected output
    model_input = input_df['cluster'].values
    
    # Reshape    
    if len(model_input.shape) == 1:
        model_input = model_input.reshape((len(model_input), 1))
    
    # Fit the model
    hmm = hmm.fit(model_input)

    # Score model
    score, results = hmm.decode(model_input)

    return score, results, hmm  
Exemple #14
0
def get_hmm_model(state):
    """Creates an instance of MultinomialHMM, which follows sklearn interface
    Input:
    - state: dictionnary
        where the keys are HiddenMarkovModelProbability choices
        where the values are the probabilities matrices or arrays which
        describes the according hidden markov model state
    Returns: an instance of a trained MultinomialHMM
    """
    hmm_model = MultinomialHMM(n_components=len(SleepStage))

    hmm_model.emissionprob_ = state[HiddenMarkovModelProbability.emission.name]
    hmm_model.startprob_ = state[HiddenMarkovModelProbability.start.name]
    hmm_model.transmat_ = state[HiddenMarkovModelProbability.transition.name]

    return hmm_model
Exemple #15
0
def detect_events_hmm(mahal_timeseries,
                      c_timeseries,
                      global_pace_timeseries,
                      threshold_quant=.95,
                      trans_matrix=DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX,
                      initial_state=None):

    #Sort the keys of the timeseries chronologically
    sorted_dates = sorted(mahal_timeseries)

    (expected_pace_timeseries,
     sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)

    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if (mahal_list[i] > threshold or c_list[i] == 1):
            symbols.append(1)
        else:
            symbols.append(0)

    # Actually set up the hmm
    model = MultinomialHMM(n_components=2,
                           transmat=trans_matrix,
                           startprob=initial_state)
    model.emissionprob_ = emission_matrix

    # Make the predictions
    lnl, predictions = model.decode(symbols)

    events = get_all_events(predictions, sorted_dates, mahal_list,
                            global_pace_list, expected_pace_list)

    # Sort events by duration, starting with the long events
    events.sort(key=lambda x: x[2], reverse=True)
    return events, predictions
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries,
                      threshold_quant=.95, trans_matrix = DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None):
            
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)
    
    
    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if(mahal_list[i] > threshold or c_list[i]==1):
            symbols.append(1)
        else:
            symbols.append(0)
    
    

  
    
    # Actually set up the hmm
    model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state)
    model.emissionprob_ = emission_matrix
    
    # Make the predictions
    lnl, predictions = model.decode(symbols)
    
    events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list,
                            expected_pace_list)
    
    # Sort events by duration, starting with the long events
    events.sort(key = lambda x: x[2], reverse=True)
    return events, predictions
Exemple #17
0
    def test_viterbi_case_handcraft(self):
        # init
        startprob = np.array([0.6, 0.4])
        transmat = np.array([[0.7, 0.3],
                                    [0.4, 0.6]])
        emissionprob = np.array([[0.1, 0.4, 0.5],
                                    [0.6, 0.3, 0.1]])
        X = np.array([1,0,2,0,2,1,0,1,1]).reshape(-1,1)

        # hmmlearn
        model = MultinomialHMM(n_components=2)
        model.startprob_ = startprob
        model.transmat_ = transmat
        model.emissionprob_ = emissionprob
        y = model.predict(X)

        # my hmm
        hmm = HMM()
        pred = hmm.viterbi(startprob, transmat, emissionprob, X)
        self.assertTrue(np.array_equal(y, pred))
def test_DiscreteHMM_fit(cases: str) -> None:
    np.random.seed(12346)
    cases = int(cases)
    i = 1
    N_decimal = 4
    max_iter = 100
    tol=1e-3
    while i < cases:
        n_samples = np.random.randint(10, 50)
        hidden_states = np.random.randint(3, 6)
        # symbols is the number of unqiue observation types.
        symbols = np.random.randint(4, 9)
        X = []
        lengths = []
        for _ in range(n_samples):
            # the actual length is seq_length + 1
            seq_length = symbols
            this_x = np.random.choice(range(symbols), size=seq_length, replace=False)
            X.append(this_x)
            lengths.append(seq_length)

        A = np.full((hidden_states, hidden_states),1/hidden_states)

        B = []
        for _ in range(hidden_states):
            this_B = np.random.dirichlet(np.ones(symbols),size=1)[0]
            B.append(this_B)
        B = np.array(B)

        pi = np.ones(hidden_states)
        pi = pi/hidden_states


        hmm_gold = MultinomialHMM(n_components=hidden_states,
                                  startprob_prior=1,
                                  transmat_prior=1,
                                  init_params='',
                                  n_iter=max_iter,
                                  tol=tol)
        hmm_gold.transmat_ = A
        hmm_gold.emissionprob_ = B
        hmm_gold.startprob_ = pi

        X_gold = np.concatenate(X).reshape((-1,1))
        hmm_gold.fit(X_gold, lengths)

        gold_A = hmm_gold.transmat_
        gold_B = hmm_gold.emissionprob_
        gold_pi = hmm_gold.startprob_

        hmm_mine = DiscreteHMM(hidden_states=hidden_states,
                               symbols=symbols,
                               A=A,
                               B=B,
                               pi=pi,
                               tol=tol,
                               max_iter=max_iter)
        hmm_mine.fit(X)
        mine_A = hmm_mine.A
        mine_B = hmm_mine.B
        mine_pi = hmm_mine.pi
        assert_almost_equal(mine_pi, gold_pi, decimal=N_decimal)
        assert_almost_equal(mine_A, gold_A, decimal=N_decimal)
        assert_almost_equal(mine_B, gold_B, decimal=N_decimal)
        i+=1

    print('Successfully testing the function of estimating parameters in discrete HMM!')
Exemple #19
0
print("Training Done")
print("Model = ",model.monitor_)
print("The transition prob of this trained model : ")
print(model.transmat_)
emiso = np.transpose(model.emissionprob_)
print("\nThe emmision prob of this trained model : ")
print("  State-0    State-1")
print(emiso)
seven_most_probabe(emiso) #printing the 7 most likely characters
print("Stationary probbabilities : ",model. get_stationary_distribution())
print("So seeing the emission probabilities we can say that State 1 is Consonant and State 0 is Vowel")

print("\nTask - 4")
model_nat = MultinomialHMM(n_components=2)
model_nat.transmat_ = trans_prob
model_nat.emissionprob_ = np.transpose(emmis_prob)
model_nat.startprob_ = np.array([0, 1])
scr2 = (model_nat.score(Data_arr))
scr1 = (model.score(Data_arr))
print("Log_Prob of Trained one is = " , scr1)
print("Log_Prob of Natural one is = " , scr2)
if(scr1 > scr2):
    print("Trained Model is better")
else:
    print("Natural Model is better")
print("Intializing the params of model from natural model")
model2 = MultinomialHMM(n_components=2,n_iter = 200)
model2.transmat_ = trans_prob
model2.emissionprob_ = np.transpose(emmis_prob)
model2.startprob_ = np.array([0, 1])
print("Training started")
Exemple #20
0
# Transition probability as specified above
transition_matrix = np.array([[0.2, 0.6, 0.15, 0.05], [0.2, 0.3, 0.3, 0.2],
                              [0.05, 0.05, 0.7, 0.2],
                              [0.005, 0.045, 0.15, 0.8]])

# Setting the transition probability
model_multinomial.transmat_ = transition_matrix

# Initial state probability
initial_state_prob = np.array([0.1, 0.4, 0.4, 0.1])

# Setting initial state probability
model_multinomial.startprob_ = initial_state_prob

# Here the emission prob is required to be in the shape of
# (n_components, n_symbols). So instead of directly feeding the
# CPD we would using the transpose of it.
emission_prob = np.array([[0.045, 0.15, 0.2, 0.6, 0.005],
                          [0.2, 0.2, 0.2, 0.3, 0.1],
                          [0.3, 0.1, 0.1, 0.05, 0.45],
                          [0.1, 0.1, 0.2, 0.05, 0.55]])

# Setting the emission probability
model_multinomial.emissionprob_ = emission_prob

# model.sample returns both observations as well as hidden states
# the first return argument being the observation and the second
# being the hidden states
Z, X = model_multinomial.sample(100)
Exemple #21
0
    emission_matrix = []
    for key in emission_dict.keys():
        tmp = emission_dict[key]
        emission_matrix.append(tmp)

    emission_matrix = np.array(emission_matrix)

    #Adding one row for unknowns
    unk = np.zeros((1, 9))
    emission_matrix = np.vstack((emission_matrix, unk))
    emission_matrix = emission_matrix.T

    model = MultinomialHMM(n_components=n_states, algorithm='viterbi')
    model.startprob_ = np.array(start_prob)
    model.transmat_ = trans_mat
    model.emissionprob_ = emission_matrix

    # format is: word gold pred
    nexcept = 0
    with open("results.txt", "w") as out:
        for sent in test_sents:
            inp = []
            for i in range(len(sent)):
                word = sent[i][0]
                try:
                    k = list(emission_dict.keys()).index(word)
                except:
                    nexcept += 1
                    k = emission_matrix.shape[0] - 1

                inp.append(k)
Exemple #22
0
import numpy as np
import math
from hmmlearn.hmm import MultinomialHMM

model_man_derby = MultinomialHMM(n_components=2)
states = ["Home", "Away"]
observations = ["Win", "Lose", "Draw"]

initial_vector = np.array([0.5, 0.5])
model_man_derby.startprob_ = initial_vector
transition_matrix = np.array([[0.2, 0.8], [0.8, 0.2]])
model_man_derby.transmat_ = transition_matrix
emission_matrix = np.array([[0.4, 0.467, 0.133], [0.4, 0.4, 0.2]])
model_man_derby.emissionprob_ = emission_matrix

result = np.array([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0],
                   [2, 1], [2, 2]]).T
titles = ["WW", "WL", "WD", "LW", "LL", "LD", "DW", "DL", "DD"]
i = 0

for title in titles:
    logprob = model_man_derby.score(result[:, i].reshape(1, -1))
    print(title, ':', math.exp(logprob))
    i += 1
Exemple #23
0
# coding: utf-8

import numpy as np
from hmmlearn.hmm import MultinomialHMM

from hmm import DiscreteHMM

if __name__ == "__main__":
    start_probability = np.array([0.2, 0.4, 0.4])
    transition_probability = np.array([[0.5, 0.2, 0.3], [0.3, 0.5, 0.2],
                                       [0.2, 0.3, 0.5]])
    emission_probability = np.array([[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]])

    disc_hmm = MultinomialHMM(n_components=3)
    disc_hmm.startprob_ = start_probability
    disc_hmm.transmat_ = transition_probability
    disc_hmm.emissionprob_ = emission_probability

    X, Z = disc_hmm.sample(100)

    my_model = DiscreteHMM(n_obs=2, n_state=3)
    my_model.train(X, Z)
    print(X)
Exemple #24
0
                   [
                       0.0, 0.0, 0.3, 0.0, 0.7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.0, 0.0, 0.0, 0.0, 0.0
                   ],
                   [
                       0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.0, 0.6, 0.4, 0.0, 0.0
                   ],
                   [
                       0.0, 0.0, 0.2, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                       0.0, 0.0, 0.0, 0.0, 0.0
                   ]])
hmmBol.n_features = 16
hmmBol.startprob_ = startprob
hmmBol.transmat_ = transmat
hmmBol.emissionprob_ = emmBol

# Position HMM
emmPos = np.array([[0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 1.0],
                   [0.0, 0.0, 0.3, 0.7, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0],
                   [0.5, 0.5, 0.0, 0.0, 0.0], [0.5, 0.5, 0.0, 0.0, 0.0],
                   [0.0, 0.0, 0.8, 0.2, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]])
hmmPos.n_features = 5
hmmPos.startprob_ = startprob
hmmPos.transmat_ = transmat
hmmPos.emissionprob_ = emmPos

# Object HMM
emmObj = np.array([[0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0],
                   [0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7],
                   [0.0, 0.0, 0.0, 0.3, 0.2, 0.2, 0.3, 0.0, 0.0],
        'T': 0
    },
    'I': {
        'A': 0.4,
        'C': 0.1,
        'G': 0.1,
        'T': 0.4
    }
}

model = MultinomialHMM(n_components=3)
model.startprob_ = np.array([1, 0, 0])
model.endprob_ = np.array([0, 0, 0.1])

model.transmat_ = np.array([[0.9, 0.1, 0], [0, 0, 1], [0, 0, 1]])
model.emissionprob_ = np.array([[0.25, 0.25, 0.25, 0.25], [0.05, 0, 0.95, 0],
                                [0.4, 0.1, 0.1, 0.4]])

# In[121]:

#"CTTCATGTGAAAGCAGACGTAAGTCA" A = 0 , C = 1 , G = 2 , T = 3
sequence = [
    1, 3, 3, 1, 0, 3, 2, 3, 2, 0, 0, 0, 2, 1, 0, 2, 0, 1, 2, 3, 0, 0, 2, 3, 1,
    0
]

logprob, seq = model.decode(np.array([sequence]).transpose())
print(logprob)
print(seq)
# E = 0 ,  5 = 1 , I = 2
print("following sequence correspond to :")
print("EEEEEEEEEEEEEEEEEE5IIIIIII")
Exemple #26
0
    def train(self, data, labels, tp=None):
        labels = np.array(labels)
        for i in range(self.nb_class):
            print "Class", i
            ind = np.where(labels == i)
            digit_data = np.array(data)[ind]

            self.fit_encode_class(digit_data, i)

            sks, lengths = self.transform_encode_class(digit_data, i)

            if not tp:

                model = MultinomialHMM(n_components=self.nb_components,
                                   n_iter=self.max_iter,
                                   tol=self.tol,
                                   verbose=True,
                                   params='ste',
                                   init_params='e')
                init = 1. / self.nb_components
                model.startprob_ = np.full(self.nb_components, init)
                model.transmat_ = np.full((self.nb_components, self.nb_components),
                                        init)

            else:
                model =  model = MultinomialHMM(n_components=self.nb_components,
                                   n_iter=self.max_iter,
                                   tol=self.tol,
                                   verbose=True,
                                   params='ste')

                # Number of distinct centroids
                num_obs = len(np.unique(np.concatenate(sks)))
                model.emissionprob_ = np.zeros((self.nb_components, num_obs))
                hist = {}
                curr = 0
                bucket_len = num_obs / self.nb_components
                for j in range(self.nb_components):
                    if j == self.nb_components - 1 and curr + bucket_len < num_obs:
                        offset = num_obs - curr - bucket_len
                        for k in range(curr, curr + bucket_len + offset):
                            if not j in hist:
                                hist[j] = []
                            hist[j].append(k)
                            model.emissionprob_[j, k] = 1
                        curr += bucket_len + offset
                    else:
                        for k in range(curr, curr + bucket_len):
                            if not j in hist:
                                hist[j] = []
                            hist[j].append(k)
                            model.emissionprob_[j, k] = 1
                        curr += bucket_len


                model.startprob_ = np.zeros(self.nb_components)
                # always ends by penup
                model.startprob_[-1] = 1


                model.transmat_ = np.zeros((self.nb_components, self.nb_components))

                state_occ_count = np.zeros(self.nb_components)
                for example in digit_data:
                    j = 0
                    prevobs = 0
                    for obs in example:
                        le = self.les[i]
                        val = le.transform(obs)
                        if j == 0:
                            prevobs = val
                            j += 1
                            continue
                        prevobs_state = None
                        obs_state = None
                        for k in range(self.nb_components):
                            if (prevobs_state != None and obs_state != None):
                                break
                            if prevobs in hist[k]:
                                prevobs_state = k
                            if val in hist[k]:
                                obs_state = k
                        state_occ_count[prevobs_state] += 1
                        model.transmat_[prevobs_state, obs_state] += 1
                        prevobs = val
                        j += 1



                for j in range(self.nb_components):
                    for k in range(self.nb_components):
                        model.transmat_[j, k] = model.transmat_[j, k] / state_occ_count[j]


            model.fit(sks, lengths)
            self.models[i] = model
Exemple #27
0
# generate emission matrix
E = []
for e in es:
    dist = HMM_graph.frequency_distribution(seq2int(e))
    E.append(dist)

# generate transition matrix
A = np.zeros((max(labels), max(labels)))
for t in ts:
    A[t[0] - 1, t[1] - 1] += 1
A = [HMM_graph.norm(row) for row in A]

hmm = MultinomialHMM(max(labels))
hmm.startprob_ = np.array(I)
hmm.emissionprob_ = np.array(E)
hmm.transmat_ = A

# Try unsupervised stuff
type1 = [
    s.seq.tostring() for s in SeqIO.parse(open('fasta/type1.fasta'), 'fasta')
]
type2 = [
    s.seq.tostring() for s in SeqIO.parse(open('fasta/type2.fasta'), 'fasta')
]

training_seqs = type1[:len(type1) / 2] + type2[:len(type2) / 2]
print training_seqs
training_seqs = map(hmmseq, training_seqs)

tA, tE = fit(hmm, training_seqs)
print(
    "By observing the most likely charcters it seems like I should have used vowels and consonants as two sepaerate states"
)

# In[109]:

#task 4
evaluate_hmm_model = hmm_model.score(training_data)

print("The score of the inbuilt trained model is")
print(evaluate_hmm_model)
print("\n")

hmm_natural_model = MultinomialHMM(n_components=2)
hmm_natural_model.transmat_ = transition_prob
hmm_natural_model.emissionprob_ = np.transpose(emission_prob)
hmm_natural_model.startprob_ = np.array([0, 1])
evaluate_hmm_natural = hmm_natural_model.score(training_data)
print("The score of my designed natural hmm is")
print(evaluate_hmm_natural)
print(hmm_natural_model.monitor_)
print("\n")
print(
    "Since, the score of the inbulit hmm model is more than the natural model")
print("Therefore the performance of inbuilt hmm model is good\n")

print("Training the natural hmm")
hmm_natural_model1 = MultinomialHMM(n_components=2, n_iter=500)
hmm_natural_model1.transmat_ = transition_prob
hmm_natural_model1.emissionprob_ = np.transpose(emission_prob)
hmm_natural_model1.startprob_ = np.array([0, 1])
Exemple #29
0
 def computeHMM(dataset, alphabet, num_matchstates=9):
     num_sequences = len(dataset)
     best_score = None
     best_model = None
     alphabet = list(alphabet)
     residue_mapper = {alphabet[j]: j for j in range(0, len(alphabet))}
     #one begin, one end, num_matchstates + 1 insert states, num_matchstates match states, num_matchstates deletion states.
     num_states = 3 + 3 * num_matchstates
     concat_dataset = np.concatenate([[[residue_mapper[x]] for x in y]
                                      for y in dataset])
     dataset_lengths = [len(x) for x in dataset]
     for x in range(0, 10):
         transition_matrix = np.zeros((num_states, num_states))
         emission_matrix = np.zeros((num_states, len(alphabet)))
         #first num_matchstates + 2 are the matchstates (including beginning and end, though those two are mute
         #first do B, then M_1,...,M_m
         #B goes to either I_0 or M_1.
         b_row = ProfileHMM.compute_random_row(2)
         transition_matrix[0][1] = b_row[0]
         transition_matrix[0][2] = b_row[1]
         for i in range(1, num_matchstates + 1):
             #go to either match state, insertion state, or delete state.
             m_row = ProfileHMM.compute_random_row(3)
             #next match state
             transition_matrix[i][i + 1] = m_row[0]
             #insert state
             transition_matrix[i][i + num_matchstates + 2] = m_row[1]
             #deletion state
             print('i: %d' % i)
             transition_matrix[i][i + 2 * num_matchstates + 2] = m_row[2]
             emission_matrix[i] = ProfileHMM.compute_random_row(
                 len(alphabet))
         #now we do the insertion states.
         for i in range(num_matchstates + 2, 2 * num_matchstates + 3):
             #either go to self, or next match state.
             row = ProfileHMM.compute_random_row(2)
             transition_matrix[i][i] = row[0]
             transition_matrix[i][i - (num_matchstates + 1)] = row[1]
             emission_matrix[i] = ProfileHMM.compute_random_row(
                 len(alphabet))
         #now do deletion states. In the loop, do all but the last one
         for i in range(2 * num_matchstates + 3, 3 * num_matchstates + 2):
             row = ProfileHMM.compute_random_row(2)
             transition_matrix[i][i] = row[0]
             transition_matrix[i][i - 2 * num_matchstates - 1] = row[1]
         model = MultinomialHMM(num_states, params="ets")
         model.n_features = len(alphabet)
         start_prob = np.zeros(num_states)
         start_prob[0] = 1.0
         print('start prob array')
         print(start_prob)
         model.startprob_ = start_prob
         model.transmat_ = transition_matrix
         model.emissionprob_ = emission_matrix
         try:
             model.fit(concat_dataset, dataset_lengths)
         except ValueError:
             pdb.set_trace()
         print('model')
         print(model)
         """
         for row in range(0, len(model.emissionprob_)):
             for col in range(0, len(model.emissionprob_[row])):
                 count = model.emissionprob_[row][col]*num_sequences
                 model.emissionprob_[row][col] = (count + 0.01)/(num_sequences + len(alphabet)*0.01)
         """
         print('emission probabilities')
         print(model.emissionprob_)
         score = model.score(concat_dataset, dataset_lengths)
         if x == 0:
             best_score = score
             best_model = model
         elif score > best_score:
             best_score = score
             best_model = model
     return best_model
Exemple #30
0
import numpy as np
from hmmlearn.hmm import MultinomialHMM

from pattern.ge_params import GEParams
from pattern.read_losses_from_csv import read_losses_from_csv, SEQUENCE_COL, RECEIVED_COL

startprob_prior = np.array([0.99, 0.01])

transmat_prior = np.array([[0.95, 0.05], [0.95, 0.05]])

emissionprob_prior = np.array([[0.9, 0.1], [0.1, 0.9]])

model = MultinomialHMM(n_components=2, verbose=False, n_iter=1000, tol=1e-3)
model.startprob_ = startprob_prior
model.transmat_ = transmat_prior
model.emissionprob_ = emissionprob_prior
model.init_params = 'st'


def fit_ge_params(losses: np.array) -> GEParams:
    model.fit(losses)
    return GEParams.from_hmm(model)


def main(csv_path: str,
         max_length: int,
         use_received: bool = False,
         verbose: bool = False):
    out_dir, csv_name = os.path.split(csv_path)
    expected = None
    try:
            high = high + 1
        elif percent >= .50:
            highMid = highMid + 1
        elif percent >= .25:
            lowMid = lowMid + 1
        else:
            low = low + 1
    matrix[1, 0] = low / len(wins)
    matrix[1, 1] = lowMid / len(wins)
    matrix[1, 2] = highMid / len(wins)
    matrix[1, 3] = high / len(wins)
    return matrix


# Load Data
filename = 'data.csv'
X = np.loadtxt(filename, delimiter=',')

player1 = X[:, 0]
player2 = X[:, 1]
record = X[:, 2]

print "stateProbs(record)", stateProbs(record)
print "eProbs(player1, record", eProbs(player1, record)
clf = MultinomialHMM(n_components=2)
clf.transmat_ = stateProbs(record)
clf.emissionprob_ = eProbs(player1, record)
print "here"
clf.fit(clf.transmat_, clf.emissionprob_)
clf.predict(player1)
Exemple #32
0
        Convert UMDHMM .hmm and .key files to pickle dumps of
        hmmlearn.MultinomialHMM and LabelEncoder objects.
        """)
args.add_argument("hmm", help="path to source UMDHMM model file)")
args.add_argument("key", help="path to source .key file")
args.add_argument("out", help="basename for output files")
args = args.parse_args()

with open(args.hmm) as f:
    umd = UmdhmmFile(f)
with open(args.key) as f:
    kf = KeyFile(f)

mhmm = MultinomialHMM(n_components=umd.n, init_params='')
mhmm.startprob_ = umd.startprob_
mhmm.transmat_ = umd.transmat_
mhmm.emissionprob_ = umd.emissionprob_

le = LabelEncoder()
le.classes_ = np.array(kf.classes)

out_pkl = '{0}.pkl'.format(args.out)
out_le = '{0}.le'.format(args.out)

joblib.dump(mhmm, out_pkl)
with open(out_le, "wb") as f:
    pickle.dump(le, f)

print("Output written to:\n\t- {0}\n\t- {1}".format(out_pkl, out_le),
      file=sys.stderr)