Beispiel #1
0
 def init_emission_probablity_matrix_row(self, row_index):
     if(self.init_method==SpecializedHMM.InitMethod.random):
         row = [0] + random_list_with_sum(self.number_of_emissions-2, 1) + [0]
         return row
     elif(self.init_method==SpecializedHMM.InitMethod.count_based):
         nr_of_training_examples = len(self.training_examples)
         alphabet = self.alphabet
         alphabet_size = len(alphabet)
         def count_position(position):
             #pseudocount
             count_list = random_list_with_sum(alphabet_size,
                                               nr_of_training_examples*0.1)
             #Do the counting
             for e in self.training_examples:
                 if position < len(e):
                     character_index = alphabet.index(e[position])
                     count_list[character_index] = count_list[character_index] + 1
             return count_list
         count_list = count_position(row_index-1)
         total_count = sum(count_list)
         def normalize_element(element):
             return element / total_count
         row = map(normalize_element, count_list)
         return [0] + row + [0]
     else:
         raise "Init Method Not Supported"
Beispiel #2
0
 def count_position(position):
     #pseudocount
     count_list = random_list_with_sum(alphabet_size,
                                       nr_of_training_examples*0.1)
     #Do the counting
     for e in self.training_examples:
         if position < len(e):
             character_index = alphabet.index(e[position])
             count_list[character_index] = count_list[character_index] + 1
     return count_list
Beispiel #3
0
    def init_emission_probablity_matrix_row(self, row_index):
        if (self.init_method == SpecializedHMM.InitMethod.random):
            row = [0] + random_list_with_sum(self.number_of_emissions - 2,
                                             1) + [0]
            return row
        elif (self.init_method == SpecializedHMM.InitMethod.count_based):
            nr_of_training_examples = len(self.training_examples)
            alphabet = self.alphabet
            alphabet_size = len(alphabet)

            def count_position(position):
                #pseudocount
                use_pseudocount = True
                uniform_pseudocount = False
                if use_pseudocount:
                    if uniform_pseudocount:
                        count_list = zeros(alphabet_size)
                        for i in range(0, alphabet_size):
                            count_list[i] = (nr_of_training_examples *
                                             0.1) / alphabet_size
                    else:
                        count_list = random_list_with_sum(
                            alphabet_size, nr_of_training_examples * 0.1)
                else:
                    count_list = zeros(alphabet_size)
                #Do the counting
                for e in self.training_examples:
                    if position < len(e):
                        character_index = alphabet.index(e[position])
                        count_list[
                            character_index] = count_list[character_index] + 1
                return count_list

            count_list = count_position(row_index - 1)
            total_count = sum(count_list)

            def normalize_element(element):
                return element / total_count

            row = map(normalize_element, count_list)
            return [0] + row + [0]
        else:
            raise "Init Method Not Supported"
Beispiel #4
0
 def count_position(position):
     #pseudocount
     use_pseudocount = True
     uniform_pseudocount = False
     if use_pseudocount:
         if uniform_pseudocount:
             count_list = zeros(alphabet_size)
             for i in range(0,alphabet_size):
                 count_list[i]= (nr_of_training_examples*0.1)/alphabet_size
         else:
             count_list = random_list_with_sum(alphabet_size,
                                               nr_of_training_examples*0.1)
     else:
         count_list = zeros(alphabet_size)
     #Do the counting
     for e in self.training_examples:
         if position < len(e):
             character_index = alphabet.index(e[position])
             count_list[character_index] = count_list[character_index] + 1
     return count_list
Beispiel #5
0
 def count_position(position):
     #pseudocount
     use_pseudocount = True
     uniform_pseudocount = False
     if use_pseudocount:
         if uniform_pseudocount:
             count_list = zeros(alphabet_size)
             for i in range(0,alphabet_size):
                 count_list[i]= (nr_of_training_examples*0.1)/alphabet_size
         else:
             count_list = random_list_with_sum(alphabet_size,
                                               nr_of_training_examples*0.1)
     else:
         count_list = zeros(alphabet_size)
     
     #Do the counting
     for e in self.training_examples:
         if position < len(e):
             character_index = alphabet.index(e[position])
             count_list[character_index] = count_list[character_index] + 1
     return count_list