コード例 #1
0
#!/usr/bin/env python2.6

from optparse import OptionParser
import sys

from htk2.units import HTK_transcription

usage = "usage: %prog [options] wordsmlf referencetrn"
parser = OptionParser(usage=usage)
parser.add_option('--num-speaker-chars', dest='numspeakerchars', type='int', default=3)


options, args = parser.parse_args()

if len(args) < 2:
    sys.exit("Need at least to arguments")

mlf,trn = args[:2]

tr = HTK_transcription()
tr.read_mlf(mlf,HTK_transcription.WORD)
tr.write_trn(trn,options.numspeakerchars)

コード例 #2
0
#!/usr/bin/env python2.6

from optparse import OptionParser
import sys

from htk2.units import HTK_transcription

usage = "usage: %prog [options] wordsmlf referencetrn"
parser = OptionParser(usage=usage)
parser.add_option('--num-speaker-chars',
                  dest='numspeakerchars',
                  type='int',
                  default=3)

options, args = parser.parse_args()

if len(args) < 2:
    sys.exit("Need at least to arguments")

mlf, trn = args[:2]

tr = HTK_transcription()
tr.read_mlf(mlf, HTK_transcription.WORD)
tr.write_trn(trn, options.numspeakerchars)
コード例 #3
0
    def initialize_new(self, scp_list, word_mlf, dict, remove_previous=False):
        System.set_log_dir(self.name)
        if remove_previous:
            for f in glob.iglob(System.get_log_dir() + '/*'):
                os.remove(f)

        if not remove_previous and (
                os.path.exists(self.train_files_dir) or
                len(glob.glob(self.model_dir + '/' + self.name + '.*')) > 0):
            raise ExistingFilesException

        if os.path.exists(self.train_files_dir):
            shutil.rmtree(self.train_files_dir)
        for f in glob.iglob(self.model_dir + '/' + self.name + '.*'):
            os.remove(f)
        os.mkdir(self.train_files_dir)

        # handle dictionary
        dic = HTK_dictionary()
        if isinstance(dict, basestring):
            dic.read_dict(dict)
        elif all(isinstance(d, basestring) for d in dict):
            for d in dict:
                dic.read_dict(d)
        else:
            raise TypeError
        dic.write_dict(self.training_dict)

        self.phones = dic.get_phones()

        # handle transcription
        trans = HTK_transcription()
        #        if isinstance(word_mlf,basestring):
        #            trans.read_mlf(word_mlf, HTK_transcription.WORD)
        #        elif all(isinstance(w,basestring) for w in word_mlf):
        #            for w in word_mlf:
        #                trans.read_mlf(w, HTK_transcription.WORD)
        #        else:
        #            raise TypeError
        word_mlf = word_mlf.strip().split(',')
        for w in word_mlf:
            trans.read_mlf(w, HTK_transcription.WORD)

        self.id = 1

        phones_list = self._get_model_name_id() + '.hmmlist'
        with open(phones_list, 'w') as phones_desc:
            for p in self.phones:
                print(p, file=phones_desc)

        # handle scp files
        scp_list = scp_list.strip().split(',')
        #        if isinstance(scp_list,basestring):
        #            scp_list = [scp_list]

        real_trans = HTK_transcription()
        real_trans.transcriptions[real_trans.WORD] = {}

        with open(self.training_scp, 'w') as scp_desc:
            for scp in scp_list:
                for file in open(scp):
                    id = os.path.splitext(os.path.basename(file.strip()))[0]
                    if not file.startswith('/'):
                        file = os.path.join(os.path.dirname(scp), file.strip())

                    ok = True

                    for word in trans.transcriptions[
                            HTK_transcription.WORD][id]:
                        if not dic.word_in_dict(word):
                            print("%s skipped, because has missing word %s" %
                                  (file.strip(), word))
                            ok = False
                            break
                    if ok:
                        print(file.strip(), file=scp_desc)
                        real_trans.transcriptions[real_trans.WORD][
                            id] = trans.transcriptions[real_trans.WORD][id]

        real_trans.write_mlf(self.training_word_mlf,
                             target=HTK_transcription.WORD)
        self.expand_word_transcription()
コード例 #4
0
    sp = parts[0]
    neighbors = [n for n in parts[2:]]

    if sp in neighbors:
        neighbors.remove(sp)

    neighbour_dict[sp] = neighbors[:options.num_neighbours]

transform_files = {}
for line in open(t_scp):
    sp = basename(line.strip())[:3]
    if sp not in transform_files:
        transform_files[sp] = []
    transform_files[sp].append(line.strip())

mlf = HTK_transcription()
mlf.read_mlf(t_mlf, target=HTK_transcription.WORD)

#trans_mlf = HTK_transcription()

with open(transform_scp, 'w') as transform_desc:
    for sp in neighbour_dict.keys():
        neighbors = neighbour_dict[sp]
        t_files = []
        for n in neighbors:
            t_files.extend(transform_files[n])
        shuffle(t_files)

        #        if options.num_adaptation_files > 0:
        #            t_files = t_files[:options.num_adaptation_files]
コード例 #5
0
    sp = parts[0]
    neighbors = [n for n in parts[2:]]

    if sp in neighbors:
        neighbors.remove(sp)

    neighbour_dict[sp] = neighbors[:options.num_neighbours]

transform_files = {}
for line in open(t_scp):
    sp = basename(line.strip())[:3]
    if sp not in transform_files:
        transform_files[sp] = []
    transform_files[sp].append(line.strip())

mlf = HTK_transcription()
mlf.read_mlf(t_mlf,target=HTK_transcription.WORD)


#trans_mlf = HTK_transcription()

with open(transform_scp, 'w') as transform_desc:
    for sp in neighbour_dict.keys():
        neighbors = neighbour_dict[sp]
        t_files = []
        for n in neighbors:
            t_files.extend(transform_files[n])
        shuffle(t_files)

#        if options.num_adaptation_files > 0:
#            t_files = t_files[:options.num_adaptation_files]