Exemple #1
0
def state_durations_from_trace(fname):
    lines = readlist(fname)

    phones = [l for l in lines if 'Name' in l]
    phones = [l.split('-')[2].split('+')[0] for l in phones]
   
    
    frames = [l for l in lines if '(frames)' in l]
    frames = [int(re.findall('\d+',l)[0]) for l in frames ][1:]

    
    nframes =sum(frames)
    
    assert len(phones) == len(frames)/5
    
    pairs = []
    p = 0
    s = 0
    outdata = []
    while s < len(frames):
        statelist = []
        for i in range(5):
            statelist.append(frames[s])
            s += 1
        outdata.append(statelist)
        p += 1
    
    return numpy.array(outdata)
        
 def load_letternames(self):
     data = readlist(self.letter_fname)
     self.letternames = {}
     for line in data:
         line = line.strip(' \n')
         letter, pron = re.split('\s+', line, maxsplit=1)
         self.letternames[letter] = pron
Exemple #3
0
def silence_frames_from_trace(fname):
    lines = readlist(fname)
    phones = [l for l in lines if 'Name' in l]
    phones = [l.split('-')[2].split('+')[0] for l in phones]
   
    
    frames = [l for l in lines if '(frames)' in l]
    frames = [int(re.findall('\d+',l)[0]) for l in frames ][1:]
    
    
    nframes =sum(frames)
    
    assert len(phones) == len(frames)/5
    
    pairs = []
    p = 0
    s = 0
    while s < len(frames):
        for i in range(5):
            pairs.append((phones[p], frames[s]))
            s += 1
        p += 1
    

    
    frames = np.zeros(nframes,dtype=int)
    i= 0
    for (p,length) in pairs:
        for j in range(length):
            if p in ['_END_','sil']:
                frames[i] = 1.0
            i +=1
    
    return frames
Exemple #4
0
 def get_silent_feature_indices(self, question_file_name, silence_pattern):
     print 'get_silent_feature_indices'
     indices = []
     questions = [q for q in readlist(question_file_name) if q != '']
     questions = [q for q in questions if 'CQS' not in q]
     for (i, question) in enumerate(questions):
         if silence_pattern in question:
             indices.append(i)
             print 'silence question found:'
             print question
     return indices
    def load_lexicon(self):

        assert os.path.isfile(self.lexicon_fname)
        items = readlist(self.lexicon_fname)
        self.entries = {}
        for item in items:
            (head, tag, pron) = item.split('\t')
            tag = tag.split(',')
            if head not in self.entries:
                self.entries[head] = []
            self.entries[head].append((tag, pron))
Exemple #6
0
    def load_stream_info(self):
        stream_info_fname = os.path.join(self.model_dir, 'stream_info.txt')
        assert os.path.isfile(stream_info_fname)
        stream_data = readlist(stream_info_fname)
        stream_data = [line.split(' ') for line in stream_data]
        assert len(stream_data) == 4
        (self.instreams, indims, self.outstreams, outdims) = stream_data
        indims = [int(val) for val in indims]
        outdims = [int(val) for val in outdims]

        ## note that indims are not network input, but input to acoustic preprocessing of data!
        assert self.outdim == sum(outdims)
        self.indims = dict(zip(self.instreams, indims))
        self.outdims = dict(zip(self.outstreams, outdims))
 def load_lexicon(self):
     ## assume one entry per head word -- take first if multiple
     assert os.path.isfile(self.lexicon_fname)
     items = readlist(self.lexicon_fname)
     self.entries = {}
     self.phone_inventory = []
     for item in items:
         (head, pron) = item.split('\t')
         if head not in self.entries:
             self.entries[head] = pron
             phones = pron.split(' ')
             for phone in phones:
                 if phone not in self.phone_inventory:
                     self.phone_inventory.append(phone)
    def load_extra_lexicon(self, extra_lex):

        assert os.path.isfile(extra_lex), 'not file: ' + extra_lex
        items = readlist(extra_lex)
        for item in items:
            if item.startswith('#') or re.match('\A\s*\Z', item):
                continue
            (head, tag, pron) = item.split('\t')
            tag = tag.split(',')
            if '|' not in pron:
                pron = self.syllabify(pron)
            if head not in self.entries:
                self.entries[head] = []
            self.entries[head].append((tag, pron))
    def process_utterance(self, utt):

        ## If there is no waveform attached to the utt, don't do anything:
        if not utt.has_attribute("waveform"):
            return

            ## Add some data to the utt structure recording the structure of the
        ## associated acoustic features we've produced. Do this first, in case
        ## we use existing features.
        self.stream_sizes[
            1] = '1'  ## otherwise '1 1 1' for F0    TODO: fix this nicely!
        utt.add_acoustic_stream_info(self.feats, self.stream_sizes)

        ## If a feature file already exists, skip:
        if utt.has_external_data(self.output_filetype):
            ##  TODO: check description against existing feats?
            return

        ## else extract features
        infile = utt.get("waveform")
        outfile = utt.get_filename(self.output_filetype)

        ## strip suffix .cmp:-
        assert outfile.endswith('.' + self.output_filetype)
        chars_to_strip = len(self.output_filetype) + 1
        outstem = outfile[:-chars_to_strip]

        rate = self.rate
        sample_rate = self.rate
        alpha = self.alpha
        order = self.order
        fftl = self.fftl
        apsize = self.apsize
        frameshift_ms = self.frameshift_ms

        script_dir = self.voice_resources.path[c.SCRIPT]

        ## 1) remove wave header, downsample etc. with sox:
        comm = "sox -t wav " + infile
        comm += " -c 1 -e signed-integer "
        comm += " -r %s" % (rate)
        comm += " -b 16 "
        comm += " " + outstem + ".wav"
        comm += " dither"  ## added for hi and rj data blizz 2014
        success = os.system(comm)
        if success != 0:
            print 'sox failed on utterance ' + utt.get("utterance_name")
            return

        comm = "%s/analysis %s.wav %s.f0.double %s.sp.double %s.bap.double > %s.log" % (
            self.tool, outstem, outstem, outstem, outstem, outstem)
        success = os.system(comm)  # This command is very slow
        # print comm
        if success != 0:
            print 'world analysis failed on utterance ' + utt.get(
                "utterance_name")
            return

        if self.resynthesise_training_data:
            ## resynthesis to test
            comm = "%s/synth %s %s %s.f0.double %s.sp.double %s.bap.double %s.resyn.wav > %s.log" % (
                self.tool, fftl, rate, outstem, outstem, outstem, outstem,
                outstem)
            success = os.system(comm)
            if success != 0:
                print 'world synthesis failed on utterance ' + utt.get(
                    "utterance_name")
                return

        comm = "%s/x2x +df %s.sp.double | %s/sopr -R -m 32768.0 | %s/mcep -a %s -m %s -l %s -j 0 -f 0.0 -q 3 > %s.mgc" % (
            self.tool, outstem, self.tool, self.tool, alpha, order, fftl,
            outstem)
        ## -e 1.0E-8
        success = os.system(comm)  # This command is very slow
        if success != 0:
            print 'conversion of world spectrum to mel cepstra failed on utterance ' + utt.get(
                "utterance_name")
            return

        for stream in ['bap']:
            comm = "%s/x2x +df %s.%s.double > %s.%s" % (
                self.tool, outstem, stream, outstem, stream)
            success = os.system(comm)
            if success != 0:
                print 'double -> float conversion (stream: ' + stream + ') failed on utterance ' + utt.get(
                    "utterance_name")
                return

        for stream in ['f0']:
            comm = "%s/x2x +da %s.%s.double > %s.%s.txt" % (
                self.tool, outstem, stream, outstem, stream)
            success = os.system(comm)
            if success != 0:
                print 'double -> ascii conversion (stream: ' + stream + ') failed on utterance ' + utt.get(
                    "utterance_name")
                return

                ## 5) F0 conversion:
        f0 = [float(val) for val in readlist(outstem + '.f0.txt')]
        log_f0 = []
        for val in f0:
            if val == 0.0:
                log_f0.append('-1.0E10')
            else:
                log_f0.append(math.log(val))
        writelist(log_f0, outstem + '.f0.log')

        comm = "%s/x2x +af %s.f0.log > %s.lf0" % (self.tool, outstem, outstem)
        success = os.system(comm)
        if success != 0:
            print 'writing log f0 failed on utterance ' + utt.get(
                "utterance_name")
            return

        ## add mcep/ap/f0 deltas:
        for (stream, dimen) in [('mgc', order + 1), ('bap', apsize),
                                ('lf0', 1)]:
            comm = "perl %s/window.pl %s " % (script_dir, dimen)
            comm += "%s.%s %s > %s.%s.delta" % (outstem, stream, ' '.join(
                self.winfiles), outstem, stream)
            success = os.system(comm)  # This command is very slow
            if success != 0:
                print 'delta (' + stream + ') extraction failed on utterance ' + utt.get(
                    "utterance_name")
                return

        ### combined streams:--
        ap = get_speech(outstem + '.bap.delta', apsize * len(self.winfiles))
        mgc = get_speech(outstem + '.mgc.delta',
                         (order + 1) * len(self.winfiles))
        lf0 = get_speech(outstem + '.lf0.delta', 1 * len(self.winfiles))
        cmp = numpy.hstack([mgc, lf0, ap])
        put_speech(cmp, outfile)

        ## 7) add header
        floats_per_frame = (order + 2 + apsize) * len(
            self.winfiles)  ## +2 for energy and F0
        add_htk_header(outfile, floats_per_frame, frameshift_ms)

        ## 8) tidy:
        self.extensions_to_keep = ['.' + self.output_filetype,
                                   '.f0.txt']  ## TODO: make configuable?
        self.extensions_to_keep.append('.resyn.wav')
        self.extensions_to_keep.extend(['.mgc', '.bap', '.lf0'])

        keepfiles = [outstem + ending for ending in self.extensions_to_keep]

        for junk in glob.glob(outstem + '.*'):
            if not junk in keepfiles:
                os.remove(junk)
Exemple #10
0
def main_work():

    #################################################

    # root is one level below this file in directory structure, ie. below the 'scripts' folder
    ROOT = os.path.split(
        os.path.realpath(
            os.path.abspath(
                os.path.dirname(inspect.getfile(
                    inspect.currentframe())))))[0] + '/'

    dirs = {
        'ROOT': ROOT,
        'CONFIG': ROOT + "configs/",
        'VOICES': ROOT + "voices/",
        'TRAIN': ROOT + "train/",
        'RULES': ROOT + "rules/",
        'CORPUS': ROOT + "corpus/",
        'BIN': ROOT + "/tools/bin/"
    }

    # ======== Get stuff from command line ==========

    a = ArgumentParser()
    a.add_argument('-s', dest='speaker', required=True, \
                    help= "the name of the speaker: <ROOT>/corpus/<LANG>/<SPEAKER>")
    a.add_argument('-l', dest='lang', required=True, \
                    help= "the language of the speaker: <ROOT>/corpus/<LANG>")
    a.add_argument('-o', dest='output', required=False, default=False, \
                    help= "output audio here")
    a.add_argument('-t', dest='stage', required=False, default="runtime", \
                    help=""" defines the current usage stage 
                            (definitions of stages should by found in <config>/recipe.cfg""")
    a.add_argument('-u', dest='output_utt', required=False, default=False, \
                    help= "output utt files here")
    a.add_argument('-play', dest='play', action="store_true", required=False, default=False, \
                    help=" play audio after synthesis")
    a.add_argument('-lab', dest='make_label', action="store_true", default=False, \
                    help= "make label file as well as wave in output location")
    a.add_argument('config',
                   help="""configuration to use: naive, semi-naive, gold, 
                                    as defined in <ROOT>/recipes/<config> -directory"""
                   )
    a.add_argument('files',
                   nargs='*',
                   help="text files to speak, reading from stdin by default")
    opts = a.parse_args()

    voice_location = os.path.join(dirs['VOICES'], opts.lang, opts.speaker,
                                  opts.config)
    train_location = os.path.join(dirs['TRAIN'], opts.lang, "speakers",
                                  opts.speaker, opts.config)
    config_path = os.path.join(dirs['CONFIG'], opts.config)
    voice_config = os.path.join(config_path, fname.RECIPE)

    ## Make Voice object to contain voice elements trained on this corpus:
    voice = Voice(opts.speaker, opts.lang, opts.config, opts.stage, dirs)

    if not opts.output:
        output_dir = os.path.join(voice_location, 'output', 'wav')
    else:
        output_dir = opts.output

#    if not opts.output:
#        output_labfile = None
#    else:
#        output_labfile = output_wavefile.replace('.wav', '.lab')

    if opts.output_utt:
        if not os.path.isdir(opts.output_utt):
            os.makedirs(opts.output_utt)

    output_extensions = []
    if opts.make_label:
        output_extensions.append('dnn_lab')

    for filename in opts.files:
        base = get_basename(filename)
        output_wavefile = os.path.join(output_dir, base + '.wav')
        text = ' '.join(readlist(filename))
        try:
            print(text)
        except:
            print('  ')  ## weird characgers
        print(base)
        if opts.output_utt:
            voice.synth_utterance(text, output_wavefile=output_wavefile, \
                         output_uttfile=os.path.join(opts.output_utt, base + '.utt'),\
                         output_extensions=output_extensions)
        else:
            voice.synth_utterance(text, output_wavefile=output_wavefile, \
                                             output_extensions=output_extensions)
 def load_onsets(self):
     onsets = readlist(self.onsets_fname)
     onsets = [tuple(line.split(' ')) for line in onsets]
     self.onsets = dict(zip(onsets, onsets))