Example #1
0
    def train(self):
        """Train HMMs"""
        initial_hmm_files = self.get_initial_hmm_files()

        if len(initial_hmm_files) == 0:
            raise ModelException, "No initial HMM files found."
        
        if not os.path.exists(self.TRAIN_HMM_ROOT):
            os.makedirs(self.TRAIN_HMM_ROOT)

        trainer = self.Trainer()
        viterbi_trainer = ViterbiTrainer(self.ViterbiCalculator(),
                                         non_diagonal=self.NON_DIAGONAL)
        
        for file in initial_hmm_files:
            char_code = int(os.path.basename(file).split(".")[0])
            hmm = MultivariateHmm.from_file(file)
            sset_file = os.path.join(self.TRAIN_FEATURES_ROOT,
                                     str(char_code) + ".sset")

            sset = self.get_sequence_set(sset_file)
            output_file = os.path.join(self.TRAIN_HMM_ROOT,
                                       "%d.xml" % char_code)

            if self.TRAINING in (self.TRAINING_VITERBI, self.TRAINING_BOTH):
                self.print_verbose("Viterbi training: " + output_file)
                viterbi_trainer.train(hmm, sset)

            if self.TRAINING in (self.TRAINING_BAUM_WELCH, self.TRAINING_BOTH):
                self.print_verbose("Baum-Welch training: " + output_file)
                trainer.train(hmm, sset)

            hmm.write(output_file)
Example #2
0
    def train(self):
        """Train HMMs"""
        initial_hmm_files = self.get_initial_hmm_files()

        if len(initial_hmm_files) == 0:
            raise ModelException, "No initial HMM files found."

        if not os.path.exists(self.TRAIN_HMM_ROOT):
            os.makedirs(self.TRAIN_HMM_ROOT)

        trainer = self.Trainer()
        viterbi_trainer = ViterbiTrainer(self.ViterbiCalculator(),
                                         non_diagonal=self.NON_DIAGONAL)

        for file in initial_hmm_files:
            char_code = int(os.path.basename(file).split(".")[0])
            hmm = MultivariateHmm.from_file(file)
            sset_file = os.path.join(self.TRAIN_FEATURES_ROOT,
                                     str(char_code) + ".sset")

            sset = self.get_sequence_set(sset_file)
            output_file = os.path.join(self.TRAIN_HMM_ROOT,
                                       "%d.xml" % char_code)

            if self.TRAINING in (self.TRAINING_VITERBI, self.TRAINING_BOTH):
                self.print_verbose("Viterbi training: " + output_file)
                viterbi_trainer.train(hmm, sset)

            if self.TRAINING in (self.TRAINING_BAUM_WELCH, self.TRAINING_BOTH):
                self.print_verbose("Baum-Welch training: " + output_file)
                trainer.train(hmm, sset)

            hmm.write(output_file)
Example #3
0
 def get_hmms_from_files(self, files):
     hmms = []
     
     for file in files:
         char_code = int(os.path.basename(file).split(".")[0])
         hmm = MultivariateHmm.from_file(file)
         hmm.char_code = char_code     
         hmms.append(hmm)
         
     return hmms
Example #4
0
    def get_hmms_from_files(self, files):
        hmms = []

        for file in files:
            char_code = int(os.path.basename(file).split(".")[0])
            hmm = MultivariateHmm.from_file(file)
            hmm.char_code = char_code
            hmms.append(hmm)

        return hmms
Example #5
0
    def get_initial_hmm(self, sset):
        n_states = self.get_n_strokes(sset.char_code) * \
                   self.N_STATES_PER_STROKE

        pi = self.get_initial_state_probabilities(n_states)
        A = self.get_state_transition_matrix(n_states)
        B = self.get_emission_matrix(n_states, sset)

        hmm = MultivariateHmm(A, B, pi)

        return hmm
Example #6
0
    def get_initial_hmm(self, sset, avg_n_obs_per_char):
        obs, chars = self.get_n_observations(sset)

        n_obs = float(obs) / chars

        n_states = round(n_obs / avg_n_obs_per_char * self.AVERAGE_N_STATES)
        n_states = int(n_states)

        self.print_verbose("%s (%d): %d" % \
                            (self.get_utf8_from_char_code(sset.char_code),
                             sset.char_code,
                             n_states))

        pi = self.get_initial_state_probabilities(n_states)
        A = self.get_state_transition_matrix(n_states)
        B = self.get_emission_matrix(n_states, sset)

        return MultivariateHmm(A, B, pi)