Exemple #1
0
    def initialize_new(self, scp_list, word_mlf, dict, remove_previous=False):
        System.set_log_dir(self.name)
        if remove_previous:
            for f in glob.iglob(System.get_log_dir() + '/*'):
                os.remove(f)

        if not remove_previous and (
                os.path.exists(self.train_files_dir) or
                len(glob.glob(self.model_dir + '/' + self.name + '.*')) > 0):
            raise ExistingFilesException

        if os.path.exists(self.train_files_dir):
            shutil.rmtree(self.train_files_dir)
        for f in glob.iglob(self.model_dir + '/' + self.name + '.*'):
            os.remove(f)
        os.mkdir(self.train_files_dir)

        # handle dictionary
        dic = HTK_dictionary()
        if isinstance(dict, basestring):
            dic.read_dict(dict)
        elif all(isinstance(d, basestring) for d in dict):
            for d in dict:
                dic.read_dict(d)
        else:
            raise TypeError
        dic.write_dict(self.training_dict)

        self.phones = dic.get_phones()

        # handle transcription
        trans = HTK_transcription()
        #        if isinstance(word_mlf,basestring):
        #            trans.read_mlf(word_mlf, HTK_transcription.WORD)
        #        elif all(isinstance(w,basestring) for w in word_mlf):
        #            for w in word_mlf:
        #                trans.read_mlf(w, HTK_transcription.WORD)
        #        else:
        #            raise TypeError
        word_mlf = word_mlf.strip().split(',')
        for w in word_mlf:
            trans.read_mlf(w, HTK_transcription.WORD)

        self.id = 1

        phones_list = self._get_model_name_id() + '.hmmlist'
        with open(phones_list, 'w') as phones_desc:
            for p in self.phones:
                print(p, file=phones_desc)

        # handle scp files
        scp_list = scp_list.strip().split(',')
        #        if isinstance(scp_list,basestring):
        #            scp_list = [scp_list]

        real_trans = HTK_transcription()
        real_trans.transcriptions[real_trans.WORD] = {}

        with open(self.training_scp, 'w') as scp_desc:
            for scp in scp_list:
                for file in open(scp):
                    id = os.path.splitext(os.path.basename(file.strip()))[0]
                    if not file.startswith('/'):
                        file = os.path.join(os.path.dirname(scp), file.strip())

                    ok = True

                    for word in trans.transcriptions[
                            HTK_transcription.WORD][id]:
                        if not dic.word_in_dict(word):
                            print("%s skipped, because has missing word %s" %
                                  (file.strip(), word))
                            ok = False
                            break
                    if ok:
                        print(file.strip(), file=scp_desc)
                        real_trans.transcriptions[real_trans.WORD][
                            id] = trans.transcriptions[real_trans.WORD][id]

        real_trans.write_mlf(self.training_word_mlf,
                             target=HTK_transcription.WORD)
        self.expand_word_transcription()
    def __init__(self, htk_config, name, model, scp, dictionary,
                 language_model):
        if not name.startswith('/'):
            name = os.path.join(os.getcwd(), name)

        if htk_config.num_speaker_chars < 0:
            htk_config.num_speaker_chars = 3

        self.name = name
        if os.path.exists(name):
            shutil.rmtree(name, ignore_errors=True)
        os.mkdir(name)

        self.a_id = 0
        self.xforms_dir = os.path.join(name, 'xforms%d' % self.a_id)
        os.mkdir(self.xforms_dir)

        self.classes_dir = os.path.join(name, 'classes%d' % self.a_id)
        os.mkdir(self.classes_dir)

        self.model = model

        self.split_scp_models = []

        if '?' in scp:
            self.scp = None
            num_scp_speaker_chars = 1
            while '?' * (num_scp_speaker_chars + 1) in scp:
                num_scp_speaker_chars += 1
            s_index = scp.find('?' * num_scp_speaker_chars)

            speakers = [
                s[s_index:s_index + num_scp_speaker_chars]
                for s in glob.iglob(scp)
            ]

            for s in speakers:
                real_scp = os.path.join(name, '%s_list.scp' % s)
                with open(real_scp, 'w') as scp_desc:
                    for line in open(
                            scp.replace('?' * num_scp_speaker_chars, s)):
                        print(os.path.join(os.path.dirname(scp), line.strip()),
                              file=scp_desc)
                self.split_scp_models.append(
                    (s, real_scp, model.replace('?' * num_scp_speaker_chars,
                                                s)))
        else:
            self.scp = os.path.join(name, 'list.scp')
            with open(self.scp, 'w') as scp_desc:
                for line in open(scp):
                    print(os.path.join(os.path.dirname(scp), line.strip()),
                          file=scp_desc)

        self.dict = dictionary

        d = HTK_dictionary()
        d.read_dict(dictionary)

        self.dict = os.path.join(name, 'dict.hdecode')
        d.write_dict(self.dict, False)

        d = HTK_dictionary()
        d.read_dict(htk_config.adap_align_dict)
        self.adap_align_dict = os.path.join(name, 'dict.hvite')
        d.write_dict(self.adap_align_dict, True)

        self.language_model = language_model

        self.htk_config = htk_config

        self.adaptations = []
        self.adap_num_speaker_chars = None

        self.id = 0
        System.set_log_dir(os.path.basename(name))
Exemple #3
0
    def __init__(self, htk_config, name, model, scp, dictionary, language_model):
        if not name.startswith('/'):
            name = os.path.join(os.getcwd(), name)

        if htk_config.num_speaker_chars < 0:
            htk_config.num_speaker_chars = 3
            
        self.name = name
        if os.path.exists(name):
            shutil.rmtree(name,ignore_errors=True)
        os.mkdir(name)

        self.a_id = 0
        self.xforms_dir = os.path.join(name,'xforms%d'%self.a_id)
        os.mkdir(self.xforms_dir)

        self.classes_dir = os.path.join(name,'classes%d'%self.a_id)
        os.mkdir(self.classes_dir)

        self.model = model

        self.split_scp_models = []


        if '?' in scp:
            self.scp = None
            num_scp_speaker_chars = 1
            while '?' * (num_scp_speaker_chars + 1) in scp:
                num_scp_speaker_chars += 1
            s_index = scp.find('?' * num_scp_speaker_chars)

            speakers = [s[s_index:s_index+num_scp_speaker_chars] for s in glob.iglob(scp)]

            for s in speakers:
                real_scp = os.path.join(name,'%s_list.scp'%s)
                with open(real_scp, 'w') as scp_desc:
                    for line in open(scp.replace('?' * num_scp_speaker_chars, s)):
                        print(os.path.join(os.path.dirname(scp), line.strip()),file=scp_desc)
                self.split_scp_models.append(
                    (s,real_scp,model.replace('?' * num_scp_speaker_chars, s))
                )
        else:
            self.scp = os.path.join(name,'list.scp')
            with open(self.scp, 'w') as scp_desc:
                for line in open(scp):
                    print(os.path.join(os.path.dirname(scp), line.strip()),file=scp_desc)

        self.dict = dictionary

        d = HTK_dictionary()
        d.read_dict(dictionary)

        self.dict = os.path.join(name, 'dict.hdecode')
        d.write_dict(self.dict,False)

        d = HTK_dictionary()
        d.read_dict(htk_config.adap_align_dict)
        self.adap_align_dict = os.path.join(name, 'dict.hvite')
        d.write_dict(self.adap_align_dict,True)


        self.language_model = language_model

        self.htk_config = htk_config

        self.adaptations = []
        self.adap_num_speaker_chars = None

        self.id = 0
        System.set_log_dir(os.path.basename(name))