Exemple #1
0
    def train(self):
        """Train HMMs"""
        initial_hmm_files = self.get_initial_hmm_files()

        if len(initial_hmm_files) == 0:
            raise ModelException, "No initial HMM files found."
        
        if not os.path.exists(self.TRAIN_HMM_ROOT):
            os.makedirs(self.TRAIN_HMM_ROOT)

        trainer = self.Trainer()
        viterbi_trainer = ViterbiTrainer(self.ViterbiCalculator(),
                                         non_diagonal=self.NON_DIAGONAL)
        
        for file in initial_hmm_files:
            char_code = int(os.path.basename(file).split(".")[0])
            hmm = MultivariateHmm.from_file(file)
            sset_file = os.path.join(self.TRAIN_FEATURES_ROOT,
                                     str(char_code) + ".sset")

            sset = self.get_sequence_set(sset_file)
            output_file = os.path.join(self.TRAIN_HMM_ROOT,
                                       "%d.xml" % char_code)

            if self.TRAINING in (self.TRAINING_VITERBI, self.TRAINING_BOTH):
                self.print_verbose("Viterbi training: " + output_file)
                viterbi_trainer.train(hmm, sset)

            if self.TRAINING in (self.TRAINING_BAUM_WELCH, self.TRAINING_BOTH):
                self.print_verbose("Baum-Welch training: " + output_file)
                trainer.train(hmm, sset)

            hmm.write(output_file)
Exemple #2
0
    def init(self):
        """Init HMMs"""

        self.load_char_dicts()

        feature_files = self.get_train_feature_files()

        if len(feature_files) == 0:
            raise ModelException, "No feature files found."
        
        if not os.path.exists(self.INIT_HMM_ROOT):
            os.makedirs(self.INIT_HMM_ROOT)

        for sset_file in feature_files:
            char_code = int(os.path.basename(sset_file[:-5]))

            sset = self.get_sequence_set(sset_file)
            sset.char_code = char_code

            hmm = self.get_initial_hmm(sset)

            output_file = os.path.join(self.INIT_HMM_ROOT,
                                       "%d.xml" % char_code)

            self.print_verbose(output_file)

            hmm.write(output_file)
Exemple #3
0
    def train(self):
        """Train HMMs"""
        initial_hmm_files = self.get_initial_hmm_files()

        if len(initial_hmm_files) == 0:
            raise ModelException, "No initial HMM files found."

        if not os.path.exists(self.TRAIN_HMM_ROOT):
            os.makedirs(self.TRAIN_HMM_ROOT)

        trainer = self.Trainer()
        viterbi_trainer = ViterbiTrainer(self.ViterbiCalculator(),
                                         non_diagonal=self.NON_DIAGONAL)

        for file in initial_hmm_files:
            char_code = int(os.path.basename(file).split(".")[0])
            hmm = MultivariateHmm.from_file(file)
            sset_file = os.path.join(self.TRAIN_FEATURES_ROOT,
                                     str(char_code) + ".sset")

            sset = self.get_sequence_set(sset_file)
            output_file = os.path.join(self.TRAIN_HMM_ROOT,
                                       "%d.xml" % char_code)

            if self.TRAINING in (self.TRAINING_VITERBI, self.TRAINING_BOTH):
                self.print_verbose("Viterbi training: " + output_file)
                viterbi_trainer.train(hmm, sset)

            if self.TRAINING in (self.TRAINING_BAUM_WELCH, self.TRAINING_BOTH):
                self.print_verbose("Baum-Welch training: " + output_file)
                trainer.train(hmm, sset)

            hmm.write(output_file)
Exemple #4
0
    def init(self):
        """Init HMMs"""

        self.load_char_dicts()

        feature_files = self.get_train_feature_files()

        if len(feature_files) == 0:
            raise ModelException, "No feature files found."

        if not os.path.exists(self.INIT_HMM_ROOT):
            os.makedirs(self.INIT_HMM_ROOT)

        for sset_file in feature_files:
            char_code = int(os.path.basename(sset_file[:-5]))

            sset = self.get_sequence_set(sset_file)
            sset.char_code = char_code

            hmm = self.get_initial_hmm(sset)

            output_file = os.path.join(self.INIT_HMM_ROOT,
                                       "%d.xml" % char_code)

            self.print_verbose(output_file)

            hmm.write(output_file)
Exemple #5
0
    def init(self):
        self.load_char_dicts()

        feature_files = self.get_train_feature_files()

        if len(feature_files) == 0:
            raise ModelException, "No feature files found."
        
        if not os.path.exists(self.INIT_HMM_ROOT):
            os.makedirs(self.INIT_HMM_ROOT)

        ssets = []

        # calculate the average number of observations for all characters
        n_observations = 0
        n_characters = 0
        
        for sset_file in feature_files:
            char_code = int(os.path.basename(sset_file[:-5]))
            
            sset = self.get_sequence_set(sset_file)
            sset.char_code = char_code
            ssets.append(sset)

            obs, chars = self.get_n_observations(sset)
            n_observations += obs
            n_characters += chars

        avg_n_obs_per_char = float(n_observations) / n_characters
            
        for sset in ssets:
            hmm = self.get_initial_hmm(sset, avg_n_obs_per_char)

            output_file = os.path.join(self.INIT_HMM_ROOT,
                                       "%d.xml" % sset.char_code)

            hmm.write(output_file)
            
Exemple #6
0
    def init(self):
        self.load_char_dicts()

        feature_files = self.get_train_feature_files()

        if len(feature_files) == 0:
            raise ModelException, "No feature files found."

        if not os.path.exists(self.INIT_HMM_ROOT):
            os.makedirs(self.INIT_HMM_ROOT)

        ssets = []

        # calculate the average number of observations for all characters
        n_observations = 0
        n_characters = 0

        for sset_file in feature_files:
            char_code = int(os.path.basename(sset_file[:-5]))

            sset = self.get_sequence_set(sset_file)
            sset.char_code = char_code
            ssets.append(sset)

            obs, chars = self.get_n_observations(sset)
            n_observations += obs
            n_characters += chars

        avg_n_obs_per_char = float(n_observations) / n_characters

        for sset in ssets:
            hmm = self.get_initial_hmm(sset, avg_n_obs_per_char)

            output_file = os.path.join(self.INIT_HMM_ROOT,
                                       "%d.xml" % sset.char_code)

            hmm.write(output_file)