def __init__(self, base_length=7, init_method=SpecializedHMM.InitMethod.random, training_examples=[], alphabet=get_example_alphabet()): ''' Training examples is only used if InitMethod.count_based is used ''' # if from_string_string != None: # #init from string # pi, A, B, V = eval(from_string_string) # super(baseHMM,self).__init__(pi, A, B, V) # return self.base_length = base_length self.init_method = init_method self.training_examples = training_examples self.alphabet = alphabet if(self.init_method==SpecializedHMM.InitMethod.count_based and len(self.training_examples)==0): raise "Training examples needs to be provided when init method is count based" #Construct the state transition matrix self.number_of_states = base_length + 2 #state transition matrix A = [] #From state 1 to state 2 the probability is state1 = zeros(self.number_of_states) state1[1]=1 #state1 = [0, 1, 0, 0, .. ,0] A.append(state1) for i in range(1,self.number_of_states-1): state_row = self.init_transition_matrix_row(i) A.append(state_row) #last state can only be transfered to state1 with probability 1 last_state = zeros(self.number_of_states) last_state[0]=1 A.append(last_state) #init state emission probabilities... self.number_of_emissions = len(self.alphabet) + 2 B = [] #init the first row with specific probability for @ B.append(zeros(self.number_of_emissions)) B[0][0] = 1 #init the rest emission probabilities without the last row for i in range(1, self.number_of_states-1): B.append(self.init_emission_probablity_matrix_row(i)) #init the last row for specific probability for $ B.append(zeros(self.number_of_emissions)) B[self.number_of_states-1][self.number_of_emissions-1] = 1 #Set of emission symbols V = ['@'] + self.alphabet + ['$'] #Initial state pi = zeros(self.number_of_states) pi[0] = 1 super(baseHMM,self).__init__(pi, A, B, V)
def __init__(self, word_length=7, init_method=SpecializedHMM.InitMethod.random, training_examples=[], alphabet=get_example_alphabet(), from_string_string = None): ''' Training examples is only used if InitMethod.count_based is used ''' if from_string_string != None: #init from string pi, A, B, V = eval(from_string_string) super(WordHMM,self).__init__(pi, A, B, V) return self.word_length = word_length self.init_method = init_method self.training_examples = training_examples self.alphabet = alphabet if(self.init_method==SpecializedHMM.InitMethod.count_based and len(self.training_examples)==0): raise "Training examples needs to be provided when init method is count based" #Construct the state transition matrix self.number_of_states = word_length + 2 #state transition matrix A = [] #From state 1 to state 2 the probability is state1 = zeros(self.number_of_states) state1[1]=1 A.append(state1) for i in range(1,self.number_of_states-1): state_row = self.init_transition_matrix_row(i) A.append(state_row) #last state can only be transfered to state1 with probability 1 last_state = zeros(self.number_of_states) last_state[0]=1 A.append(last_state) #init state emission probabilities... self.number_of_emissions = len(self.alphabet) + 2 B = [] #init the first row with specific probability for @ B.append(zeros(self.number_of_emissions)) B[0][0] = 1 #init the rest emission probabilities without the last row for i in range(1, self.number_of_states-1): B.append(self.init_emission_probablity_matrix_row(i)) #init the last row for specific probability for $ B.append(zeros(self.number_of_emissions)) B[self.number_of_states-1][self.number_of_emissions-1] = 1 #Set of emission symbols V = ['@'] + self.alphabet + ['$'] #Initial state pi = zeros(self.number_of_states) pi[0] = 1 super(WordHMM,self).__init__(pi, A, B, V)
def init_transition_matrix_row(self, row_index): if(self.init_method==SpecializedHMM.InitMethod.random): return zeros_and_random_with_sum1(self.number_of_states, self.number_of_states-row_index) elif(self.init_method==SpecializedHMM.InitMethod.count_based): row = (zeros(row_index) + list_with_sum_and_equal_elements(self.number_of_states-row_index-1,0.2)) row.insert(row_index+1,0.8) return row else: raise "Init Method Not Supported"
def count_position(position): #pseudocount use_pseudocount = True uniform_pseudocount = False if use_pseudocount: if uniform_pseudocount: count_list = zeros(alphabet_size) for i in range(0,alphabet_size): count_list[i]= (nr_of_training_examples*0.1)/alphabet_size else: count_list = random_list_with_sum(alphabet_size, nr_of_training_examples*0.1) else: count_list = zeros(alphabet_size) #Do the counting for e in self.training_examples: if position < len(e): character_index = alphabet.index(e[position]) count_list[character_index] = count_list[character_index] + 1 return count_list