Example #1
0
def frontend():
    from ttslab.defaultvoice import LwaziVoice
    voice = LwaziVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                       g2p=ttslab.fromfile(G2P_FILE),
                       pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                       pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE))
    ttslab.tofile(voice, "frontend.voice.pickle")
Example #2
0
def us():
    from ttslab.defaultvoice import LwaziUSVoice
    from ttslab.synthesizer_us import SynthesizerUS
    voice = LwaziUSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                         g2p=ttslab.fromfile(G2P_FILE),
                         pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                         pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                         synthesizer=SynthesizerUS(voice=None, unitcatalogue=ttslab.fromfile(USCATALOGUE_FILE)))
    ttslab.tofile(voice, "us.voice.pickle")
Example #3
0
def htsfrontend():
    from ttslab.defaultvoice import LwaziHTSVoice
    from ttslab.synthesizer_htsme import SynthesizerHTSME
    voice = LwaziHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                          g2p=ttslab.fromfile(G2P_FILE),
                          pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                          pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                          synthesizer=SynthesizerHTSME(voice=None, models_dir=None))
    ttslab.tofile(voice, "frontend.hts.voice.pickle")
def hts():
    from ttslab.defaultvoice import LwaziHTSVoice
    from ttslab.voices.yoruba_default import SynthesizerHTSME_Tone_NoTone
    voice = LwaziHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                          g2p=ttslab.fromfile(G2P_FILE),
                          pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                          pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                          synthesizer=SynthesizerHTSME_Tone_NoTone(voice=None, models_dir=os.path.join(os.getcwd(), HTSMODELS_DIR)))
    ttslab.tofile(voice, "hts.voice.pickle")
Example #5
0
def uttdtwdistcalc(args):
    vfname, ufname = args
    v = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = v.synthesize(u["text"], "text-to-wave")
    t = u.utt_distance(u2)
    t.name = u["file_id"]
    u["dtwdists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
Example #6
0
def wordus():
    from ttslab.defaultvoice import WordUSVoice
    from ttslab.synthesizer_us import SynthesizerUSWordUnits
    voice = WordUSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                        g2p=ttslab.fromfile(G2P_FILE),
                        pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                        pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                        synthesizer=SynthesizerUSWordUnits(voice=None, unitcatalogue=ttslab.fromfile(USCATALOGUE_FILE)),
                        silword="PAUSE")
    ttslab.tofile(voice, "wordus.voice.pickle")
Example #7
0
def uttlindistcalc(args):
    vfname, ufname = args
    v = ttslab.fromfile(vfname)
    u = ttslab.fromfile(ufname)
    print(u["file_id"], end=" ")
    u2 = copy.deepcopy(u)
    u2.voice = v
    u2 = v.resynthesize(u2, processname="utt-to-wave", htsparms={"-vp": True})
    t = u.utt_distance(u2, method="linear")
    t.name = u["file_id"]
    u["lindists"] = {"utt": u2, "track": t}
    ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
Example #8
0
 def prev(self):
     self.save_data()
     if self.current_index > 0:
         self.current_index -= 1
         self.current_wordindex = self.worklist[self.current_index][1]
         self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
         self.current_utt.fill_startendtimes()
def train_standard(parms):

    #setup dirs...
    os.makedirs(parms["workingdir"])
    t = tarfile.open(parms["template"], "r:*")
    t.extractall(parms["workingdir"])

    #SETUP FILES
    shutil.copy(parms["questionsfile"], os.path.join(parms["workingdir"], QUESTIONS_SUBDIR))
    shutil.copy(parms["uttquestionsfile"], os.path.join(parms["workingdir"], QUESTIONS_SUBDIR))
    print(os.getcwd())
    for fn in sorted(glob(os.path.join(parms["utts"], "*." + UTT_EXT))):
        print("PROCESSING: %s" % (fn))
        #copy utt with DATASET_SPEAKER_bname to HTS tree:
        shutil.copy(fn, os.path.join(parms["workingdir"], UTT_SUBDIR, "_".join([DATASET, SPEAKER, os.path.basename(fn)])))
        #get raw audio files from utts:
        u = ttslab.fromfile(fn)
        waveform = u["waveform"]
        waveform.write(os.path.join(parms["workingdir"],
                                    RAW_SUBDIR,
                                    "_".join([DATASET, SPEAKER, os.path.basename(fn)])[:-len(UTT_EXT)] + RAW_EXT))
        waveform.write(os.path.join(parms["workingdir"],
                                    WAV_SUBDIR,
                                    "_".join([DATASET, SPEAKER, os.path.basename(fn)])[:-len(UTT_EXT)] + WAV_EXT))
        
    #TRAIN...
    os.chdir(parms["workingdir"])
    os.system(CONFIGURE % (WITH_SPTK_SEARCH_PATH,
                           WITH_HTS_SEARCH_PATH,
                           WITH_HTS_ENGINE_SEARCH_PATH,
                           SPEAKER, DATASET, parms["pitchmin"], parms["pitchmax"],
                           parms["voice"]))
    os.system(MAKE)
Example #10
0
def main():

    try:
        try:
            voicefile = sys.argv[1]
            proc = sys.argv[2]
        except IndexError:
            raise CLIException

        voice = ttslab.fromfile(voicefile)

        if proc == "auto":
            auto(voice)
        elif proc == "to_textgrid":
            to_textgrid(voice)
        elif proc == "from_textgrid":
            from_textgrid(voice)
        elif proc == "alignments_from_textgrid":
            alignments_from_textgrid(voice)
        else:
            raise CLIException
    except CLIException:
        print(
            "USAGE: ttslab_align.py [VOICEFILE] [auto | to_textgrid | from_textgrid | alignments_from_textgrid]"
        )
def main():

     try:
        voicefile = sys.argv[1]
        featconfpath = sys.argv[2]
        switch = sys.argv[3]
     except IndexError:
         print("USAGE: ttslab_make_wordunits.py VOICEFILE FEATSCONF [auto | make_features | make_catalogue]")
         sys.exit()

     voice = ttslab.fromfile(voicefile)
     with open(featconfpath) as conffh:
         featconfig = ConfigParser()
         featconfig.readfp(conffh)
     try:
         if switch == "auto":
             auto(featconfig, voice)
         elif switch == "make_features":
             make_features(featconfig)
         elif switch == "make_catalogue":
             make_catalogue(voice)
         else:
             raise CLIException
     except CLIException:
         print("USAGE: ttslab_make_wordunits.py VOICEFILE FEATSCONF [auto | make_features | make_catalogue]")
Example #12
0
def main():

    try:
        voicefile = sys.argv[1]
        featconfpath = sys.argv[2]
        switch = sys.argv[3]
    except IndexError:
        print(
            "USAGE: ttslab_make_wordunits.py VOICEFILE FEATSCONF [auto | make_features | make_catalogue]"
        )
        sys.exit()

    voice = ttslab.fromfile(voicefile)
    with open(featconfpath) as conffh:
        featconfig = ConfigParser()
        featconfig.readfp(conffh)
    try:
        if switch == "auto":
            auto(featconfig, voice)
        elif switch == "make_features":
            make_features(featconfig)
        elif switch == "make_catalogue":
            make_catalogue(voice)
        else:
            raise CLIException
    except CLIException:
        print(
            "USAGE: ttslab_make_wordunits.py VOICEFILE FEATSCONF [auto | make_features | make_catalogue]"
        )
Example #13
0
 def prev(self):
     self.save_data()
     if self.current_index > 0:
         self.current_index -= 1
         self.current_wordindex = self.worklist[self.current_index][1]
         self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
         self.current_utt.fill_startendtimes()
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()
    for unit, word in zip(u.gr("Unit"), u.gr("Word")):
        assert unit["name"] == word["name"]
        unit["start"] = word["start"]
        unit["end"] = word["end"]

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    for i, unit in enumerate(u.gr("Unit")):
        if i == 0:
            boundarytimes.append(unit["start"])
        boundarytimes.append(unit["end"])

    #convert boundtimes into sample ranges:
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []
    for bound in boundarytimes:
        lpcsampleranges.append(lpctrack.index_at(bound))
        f0sampleranges.append(f0track.index_at(bound))
        joinsamples.append(jointrack.values[jointrack.index_at(bound)])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()
    
    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1], joinsamples[1:],
                                                   lpcsampleranges[:-1], lpcsampleranges[1:],
                                                   f0sampleranges[:-1], f0sampleranges[1:],
                                                   units):
#        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
                                        restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
Example #15
0
 def __init__(self, worklist, phmap):
     self.phmap = phmap
     self.worklist = worklist
     self.current_index = 0
     self.current_wordindex = self.worklist[self.current_index][1]
     self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
     self.current_utt.fill_startendtimes()
     self.transcriptions = {self.worklist[self.current_index][0]: self.current_utt["text"]}
     self.comments = {self.worklist[self.current_index][0]: ""}
     self.pronuns = {self.worklist[self.current_index][0]: [" ".join(getpronun(w, self.phmap)) for w in self.current_utt.gr("SylStructure")]}
Example #16
0
 def __init__(self, worklist, voice):
     self.voice = voice
     self.worklist = worklist
     self.current_index = 0
     self.current_wordindex = self.worklist[self.current_index][1]
     self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
     self.current_utt.fill_startendtimes()
     self.transcriptions = {self.worklist[self.current_index][0]: self.current_utt["inputtext"]}
     self.comments = {self.worklist[self.current_index][0]: ""}
     self.pronuns = {self.worklist[self.current_index][0]: [" ".join(getpronun(w, self.voice)) for w in self.current_utt.gr("SylStructure")]}
def make_units(voice, utt_dir):
    """ Run 'maketargetunits' process on Utterances to create Unit
        level to generate structure for adding acoustic features...
    """
    print("MAKING UNITS..")
    utts = []
    for uttfilename in sorted(glob(os.path.join(utt_dir, ".".join(["*", UTT_EXT])))):
        print(uttfilename)
        utt = ttslab.fromfile(uttfilename)
        utt = voice.synthesizer(utt, "targetunits")     #DEMITASSE voice needs resynth method..
        utts.append(utt)
    return utts
def train_standard(parms):

    #setup dirs...
    os.makedirs(parms["workingdir"])
    t = tarfile.open(parms["template"], "r:*")
    t.extractall(parms["workingdir"])

    #SETUP FILES
    shutil.copy(parms["questionsfile"],
                os.path.join(parms["workingdir"], QUESTIONS_SUBDIR))
    shutil.copy(parms["uttquestionsfile"],
                os.path.join(parms["workingdir"], QUESTIONS_SUBDIR))
    print(os.getcwd())
    for fn in sorted(glob(os.path.join(parms["utts"], "*." + UTT_EXT))):
        print("PROCESSING: %s" % (fn))
        #copy utt with DATASET_SPEAKER_bname to HTS tree:
        shutil.copy(
            fn,
            os.path.join(parms["workingdir"], UTT_SUBDIR,
                         "_".join([DATASET, SPEAKER,
                                   os.path.basename(fn)])))
        #get raw audio files from utts:
        u = ttslab.fromfile(fn)
        waveform = u["waveform"]
        waveform.write(
            os.path.join(
                parms["workingdir"], RAW_SUBDIR,
                "_".join([DATASET, SPEAKER,
                          os.path.basename(fn)])[:-len(UTT_EXT)] + RAW_EXT))
        waveform.write(
            os.path.join(
                parms["workingdir"], WAV_SUBDIR,
                "_".join([DATASET, SPEAKER,
                          os.path.basename(fn)])[:-len(UTT_EXT)] + WAV_EXT))

    #TRAIN...
    os.chdir(parms["workingdir"])
    os.system(CONFIGURE %
              (WITH_SPTK_SEARCH_PATH, WITH_HTS_SEARCH_PATH,
               WITH_HTS_ENGINE_SEARCH_PATH, SPEAKER, DATASET,
               parms["pitchmin"], parms["pitchmax"], parms["voice"]))
    os.system(MAKE)

    #COPY FILTERS FOR HTSME_ENGINE...
    print("COPYING FILTERS")
    for fn in glob(
            os.path.join(FILTERS_SUBDIR, ".".join(["*", HTSME_ENGINE_EXT]))):
        destfn = os.path.join(
            MODELS_SUBDIR,
            os.path.basename(fn)[:-len(HTSME_ENGINE_EXT) - 1])
        print(fn, destfn)
        shutil.copy(fn, destfn)
Example #19
0
 def next(self):
     self.save_data()
     if self.current_index < len(self.worklist) - 1:
         self.current_index += 1
         self.current_wordindex = self.worklist[self.current_index][1]
         self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
         self.current_utt.fill_startendtimes()
         if self.worklist[self.current_index][0] not in self.transcriptions:
             self.transcriptions[self.worklist[self.current_index][0]] = self.current_utt["inputtext"]
         if self.worklist[self.current_index][0] not in self.comments:
             self.comments[self.worklist[self.current_index][0]] = ""
         if self.worklist[self.current_index][0] not in self.pronuns:
             self.pronuns[self.worklist[self.current_index][0]] = [" ".join(getpronun(w, self.voice)) for w in self.current_utt.gr("SylStructure")]
Example #20
0
 def next(self):
     self.save_data()
     if self.current_index < len(self.worklist) - 1:
         self.current_index += 1
         self.current_wordindex = self.worklist[self.current_index][1]
         self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
         self.current_utt.fill_startendtimes()
         if self.worklist[self.current_index][0] not in self.transcriptions:
             self.transcriptions[self.worklist[self.current_index][0]] = self.current_utt["text"]
         if self.worklist[self.current_index][0] not in self.comments:
             self.comments[self.worklist[self.current_index][0]] = ""
         if self.worklist[self.current_index][0] not in self.pronuns:
             self.pronuns[self.worklist[self.current_index][0]] = [" ".join(getpronun(w, self.phmap)) for w in self.current_utt.gr("SylStructure")]
def make_units(voice, utt_dir):
    """Run synthesizer "feats" process on Utterances to create Unit level
       to generate structure for adding acoustic features...
    """
    print("MAKING UNITS..")
    utts = []
    for uttfilename in sorted(
            glob(os.path.join(utt_dir, ".".join(["*", UTT_EXT])))):
        print(uttfilename)
        utt = ttslab.fromfile(uttfilename)
        utt = voice.synthesizer(utt, ("feats", None))
        utts.append(utt)
    return utts
Example #22
0
def make_units(voice, utt_dir):
    """ Run 'maketargetunits' process on Utterances to create Unit
        level to generate structure for adding acoustic features...
    """
    print("MAKING UNITS..")
    utts = []
    for uttfilename in sorted(
            glob(os.path.join(utt_dir, ".".join(["*", UTT_EXT])))):
        print(uttfilename)
        utt = ttslab.fromfile(uttfilename)
        utt = voice.synthesizer(
            utt, "targetunits")  #DEMITASSE voice needs resynth method..
        utts.append(utt)
    return utts
Example #23
0
def make_voice(langs, synthfile="frontend"):
    pronun = {}
    for i, lang in enumerate(langs):
        if i == 0:
            exec("from ttslab.lang.%(lang)s import Voice" % {"lang": lang})
            langpref = "main"
        else:
            langpref = lang
        pronun[langpref] = {}
        pronun[langpref]["phoneset"] = ttslab.fromfile(langpref +
                                                       PHONESET_FILESUFFIX)
        pronun[langpref]["pronundict"] = ttslab.fromfile(langpref +
                                                         PRONUNDICT_FILESUFFIX)
        pronun[langpref]["pronunaddendum"] = ttslab.fromfile(
            langpref + PRONUNADDENDUM_FILESUFFIX)
        pronun[langpref]["g2p"] = ttslab.fromfile(langpref + G2P_FILESUFFIX)
    if synthfile == "frontend":
        voice = Voice(pronun=pronun, synthesizer=None)
        ttslab.tofile(voice, "frontend.voice.pickle")
    else:
        synthesizer = ttslab.fromfile(synthfile)
        voice = Voice(pronun=pronun, synthesizer=synthesizer)
        ttslab.tofile(voice, "voice.pickle")
Example #24
0
 def __init__(self, worklist, voice, previous):
     self.transcriptions, self.pronuns, self.comments = previous
     self.voice = voice
     self.worklist = worklist
     self.current_index = 0
     self.current_wordindex = self.worklist[self.current_index][1]
     self.current_utt = ttslab.fromfile(self.worklist[self.current_index][0])
     self.current_utt.fill_startendtimes()
     #SET STATE:
     if self.worklist[self.current_index][0] not in self.transcriptions:
         self.transcriptions[self.worklist[self.current_index][0]] = self.current_utt["inputtext"]
     if self.worklist[self.current_index][0] not in self.comments:
         self.comments[self.worklist[self.current_index][0]] = ""
     if self.worklist[self.current_index][0] not in self.pronuns:
         self.pronuns[self.worklist[self.current_index][0]] = [" ".join(getpronun(w, self.voice)) for w in self.current_utt.gr("SylStructure")]
Example #25
0
def scores(vfname, method="dtw"):
    try:
        os.makedirs(UTTDIR2)
        indirname = UTTDIR
        print("Using utts in %s as input..." % UTTDIR)
    except OSError:
        indirname = UTTDIR2
        print("Using utts in %s as input..." % UTTDIR2)
    if method == "linear":
        map(uttlindistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "dtw":
        map(uttdtwdistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "alignlogl":
        for uttfn in sorted(glob(os.path.join(indirname, "*"))):
            print(uttfn)
            u = ttslab.fromfile(uttfn)
            ul = sl.Utterance(os.path.join(RECDIR, u["file_id"] + ".rec"))
            u = parse_logl_from_recs(u, ul, v.phoneset)
            ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
Example #26
0
def scores(vfname, method="dtw"):
    try:
        os.makedirs(UTTDIR2)
        indirname = UTTDIR
        print("Using utts in %s as input..." % UTTDIR)
    except OSError:
        indirname = UTTDIR2
        print("Using utts in %s as input..." % UTTDIR2)
    if method == "linear":
        map(uttlindistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "dtw":
        map(uttdtwdistcalc, [[vfname, ufname] for ufname in sorted(glob(os.path.join(indirname, "*")))])
    elif method == "alignlogl":
        for uttfn in sorted(glob(os.path.join(indirname, "*"))):
            print(uttfn)
            u = ttslab.fromfile(uttfn)
            ul = sl.Utterance(os.path.join(RECDIR, u["file_id"] + ".rec"))
            u = parse_logl_from_recs(u, ul, v.pronun["main"]["phoneset"].features["closure_phone"], v.phonemap)
            ttslab.tofile(u, os.path.join(UTTDIR2, u["file_id"] + ".utt.pickle"))
Example #27
0
def main():

    try:
        try:
            voicefile = sys.argv[1]
            proc = sys.argv[2]
        except IndexError:
            raise CLIException

        voice = ttslab.fromfile(voicefile)
        
        if proc == "auto":
            auto(voice)
        elif proc == "to_textgrid":
            to_textgrid(voice)
        elif proc == "from_textgrid":
            from_textgrid(voice)
        elif proc == "alignments_from_textgrid":
            alignments_from_textgrid(voice)
        else:
            raise CLIException
    except CLIException:
        print("USAGE: ttslab_align.py [VOICEFILE] [auto | to_textgrid | from_textgrid | alignments_from_textgrid]")
Example #28
0
def make_voice(synthfile=SYNTHESIZER_FILE, pitchmodelfile=PITCHMODEL_FILE):
    langs = [os.path.basename(os.getcwd())]
    pronun = {}
    for i, lang in enumerate(langs):
        if i == 0:
            exec("from ttslab.lang.%(lang)s import Voice" % {"lang": lang})
            langpref = "main"
        else:
            langpref = lang
        pronun[langpref] = {}
        pronun[langpref]["phoneset"] = ttslab.fromfile(langpref +
                                                       PHONESET_FILESUFFIX)
        pronun[langpref]["pronundict"] = ttslab.fromfile(langpref +
                                                         PRONUNDICT_FILESUFFIX)
        pronun[langpref]["pronunaddendum"] = ttslab.fromfile(
            langpref + PRONUNADDENDUM_FILESUFFIX)
        pronun[langpref]["g2p"] = ttslab.fromfile(langpref + G2P_FILESUFFIX)
    synthesizer = ttslab.fromfile(synthfile)
    pitchmodel = ttslab.fromfile(pitchmodelfile)
    voice = Voice(pronun=pronun, synthesizer=synthesizer)
    voice.pitchmodel = pitchmodel
    ttslab.tofile(voice, VOICE_FILE)
Example #29
0
    output.close()
    output = StringIO()
    mpld3.save_html(fig2, output)
    pitch_html = output.getvalue()
    output.close()
    output = StringIO()
    mpld3.save_html(fig3, output)
    wave_html = output.getvalue()
    output.close()
    plt.close(fig1)
    plt.close(fig2)
    plt.close(fig3)
    return syl_html, pitch_html, wave_html


if __name__ == '__main__':

    try:
        uttfname = sys.argv[1]
    except IndexError:
        print("USAGE: uttviz_d3.py UTTFNAME")
        sys.exit()

    utt = ttslab.fromfile(uttfname)
    fig1, fig2, fig3 = draw_sylstruct_graph_pitch_waveform(utt)
    mpld3.save_html(
        fig1, open(os.path.basename(uttfname) + "_sylstructure.html", "w"))
    mpld3.save_html(fig2,
                    open(os.path.basename(uttfname) + "_pitch.html", "w"))
    mpld3.save_html(fig3, open(os.path.basename(uttfname) + "_wave.html", "w"))
Example #30
0
                        default=DEFSTRESSTONE,
                        help="default stress/tone")
    args = parser.parse_args()

    phonemap = None
    if args.outphonemapfn is not None:
        phonemap = {}
        with codecs.open(args.outphonemapfn, encoding="utf-8") as infh:
            for line in infh:
                a, b = line.split()
                if args.mapreverse:
                    a, b = (b, a)
                phonemap[a] = b

    defstresstone = args.defstresstone
    phset = ttslab.fromfile(args.phonesetfn)
    inphmap = dict([(v, k) for k, v in phset.map.iteritems()])

    for line in sys.stdin:
        fields = unicode(line, encoding="utf-8").split()
        word, pos, stresspat, sylspec = fields[:4]
        assert len(stresspat) == len(sylspec)
        phones = map(lambda x: inphmap[x], fields[4:])
        #print(word, pos, stresspat, sylspec)
        #print(phones)

        i = 0
        syls = []
        for n, stress in zip([int(slen) for slen in sylspec], stresspat):
            syl = phones[i:i + n]
            i += n
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Generates a list of transcriptions that changed during a
    speechbrowser session.
"""
from __future__ import unicode_literals, division, print_function  #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import os
import sys
import codecs

import ttslab

if __name__ == "__main__":
    transcrlist, pronunlist, commentlist = ttslab.fromfile(sys.argv[1])
    transcr = {}
    pronun = {}
    for k in sorted(transcrlist):
        u = ttslab.fromfile(k)
        #print(u["text"], transcrlist[k])
        if u["text"] != transcrlist[k]:
            transcr[os.path.basename(k)[:-len(".utt.pickle")]] = transcrlist[k]
    with codecs.open("newutts.data", "w", encoding="utf-8") as outfh:
        for k in sorted(transcr):
            outfh.write('( %s "%s" )\n' % (k, transcr[k]))
Example #32
0
 def loadvoice(self, name, voice_location):
     log.info("Loading voice from file '%s'" % (voice_location))
     self.voices[name] = ttslab.fromfile(voice_location)
     log.info("Voice '%s' loaded." % (name))
Example #33
0
                        type=str,
                        help="aligned Utterance file (.utt.pickle)")
    parser.add_argument('f0fn',
                        metavar='F0FN',
                        type=str,
                        help="corresponding F0 file (.track.pickle)")
    parser.add_argument(
        '--qtaspecsfn',
        metavar='QTASPECSFN',
        type=str,
        help="qTA parameter search config: ranges and quantisation (.json)")
    parser.add_argument(
        '--extract',
        action="store_true",
        help=
        "extract new parameters and plot instead of using existing annotations."
    )
    args = parser.parse_args()

    utt = ttslab.fromfile(args.uttfn)
    f0 = ttslab.fromfile(args.f0fn)
    if args.extract:
        utt.fill_startendtimes()

    if args.qtaspecsfn:
        with open(args.qtaspecsfn) as infh:
            qtaspecs = json.load(infh)
        ttslab.pitchsynth.qta.utt_plot(utt, f0, qtaspecs, args.extract)
    else:
        ttslab.pitchsynth.qta.utt_plot(utt, f0, annotate=args.extract)
Example #34
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Print utterance structure...
"""
from __future__ import unicode_literals, division, print_function #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import sys

import ttslab

if __name__ == '__main__':
    try:
        uttfn = sys.argv[1]
    except IndexError:
        print("USAGE: uttplay.py UTTFNAME")
        sys.exit(1)

    print(ttslab.fromfile(uttfn))
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    durations = []
    starttime = 0.0
    for seg in u.get_relation("Segment"):
        endtime = float(seg["end"])
        if "cl_end" in seg:
            splittime = float(seg["cl_end"])
        else:
            splittime = (endtime + starttime) / 2
            #TODO: should still add 25% split if diphthong...
        boundarytimes.append([starttime, splittime, endtime])
        durations.extend([splittime - starttime, endtime - splittime])
        starttime = endtime

    #convert boundtimes into sample ranges (and flatten):
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []

    #DEMITASSE: If not pruning pau halfphones:
    # for bounds in boundarytimes:
    #     lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]),
    #                             lpctrack.get_index_at(bounds[1])])
    #     joinsamples.extend([jointrack.get_sample_at(bounds[0]),
    #                         jointrack.get_sample_at(bounds[1])])
    # lpcsampleranges.append(len(lpctrack))
    # joinsamples.append(jointrack.get_sample_at(len(jointrack)))

    #DEMITASSE: If pruning pau halfphones:
    durations = durations[1:-1]
    for i, bounds in enumerate(boundarytimes):
        if i == 0:
            lpcsampleranges.append(lpctrack.index_at(bounds[1]))
            f0sampleranges.append(f0track.index_at(bounds[1]))
            joinsamples.append(jointrack.values[bounds[1]])
        else:
            lpcsampleranges.extend([lpctrack.index_at(bounds[0]),
                                    lpctrack.index_at(bounds[1])])
            f0sampleranges.extend([f0track.index_at(bounds[0]),
                                   f0track.index_at(bounds[1])])
            joinsamples.extend([jointrack.values[bounds[0]],
                                jointrack.values[bounds[1]]])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()
    
    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(joinsamples[:-1], joinsamples[1:],
                                                        lpcsampleranges[:-1], lpcsampleranges[1:],
                                                        f0sampleranges[:-1], f0sampleranges[1:],
                                                        durations,
                                                        units):
#        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        i["dur"] = dur
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
                                        restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
Example #36
0
        starttime = endtime

    return lab


if __name__ == "__main__":
    try:
        switch = sys.argv[1]
        voicefile = sys.argv[2]
        infilename = sys.argv[3]
    except IndexError:
        print("usage: utt2lab.py [mono|full] [VOICEFILE] [INFILENAME]")
        sys.exit(1)
        
    #Load voice and utt and link...
    voice = ttslab.fromfile(voicefile)
    utt = ttslab.fromfile(infilename)
    utt.voice = voice

    if switch == "mono":
        #t1 = time.time()
        lab = utt2lab_mono(utt)
        #print("Time: " + str(time.time() - t1))
    elif switch == "full":
        #t1 = time.time()        
        lab = utt2lab_full(utt)
        #print("Time: " + str(time.time() - t1))
    else:
        print("Invalid switch: %s" % (switch))
        sys.exit(1)
import sys

import ttslab

PHONESETFN = "phoneset.pickle"

ALL_CONTEXTS = {"LL": "%s^*",
                "L": "*^%s-*",
                "C": "*-%s+*",
                "R": "*+%s=*",
                "RR": "*=%s@*"}
VOWEL_CONTEXTS = {"C-Syl": "*|%s/C:*"}

if __name__ == "__main__":
    try:
        phset = ttslab.fromfile(PHONESETFN)
    except IOError:
        print("Could not find file: '%s'" % (PHONESETFN))

    #get all feature categories:
    categories = set()
    for phn in phset.phones:
        categories.update(phset.phones[phn])

    #get feature categories involving vowels:
    vcategories = set()
    for phn in phset.phones:
        if "vowel" in phset.phones[phn]:
            vcategories.update(phset.phones[phn])

    #do all contexts:
Example #38
0
def multihtsfrontend():
    from ttslab.defaultvoice import LwaziMultiHTSVoice
    from ttslab.synthesizer_htsme import SynthesizerHTSME
    try:
        voice = LwaziMultiHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                                   g2p=ttslab.fromfile(G2P_FILE),
                                   pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                                   pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                                   engphoneset=ttslab.fromfile(ENGPHONESET_FILE),
                                   engg2p=ttslab.fromfile(ENGG2P_FILE),
                                   engpronundict=ttslab.fromfile(ENGPRONUNDICT_FILE),
                                   engpronunaddendum=ttslab.fromfile(ENGPRONUNADDENDUM_FILE),
                                   synthesizer=SynthesizerHTSME(voice=None, models_dir=None))
    except IOError:
        voice = LwaziMultiHTSVoice(phoneset=ttslab.fromfile(PHONESET_FILE),
                                   g2p=ttslab.fromfile(G2P_FILE),
                                   pronundict=ttslab.fromfile(PRONUNDICT_FILE),
                                   pronunaddendum=ttslab.fromfile(PRONUNADDENDUM_FILE),
                                   engphoneset=ttslab.fromfile(ENGPHONESET_FILE),
                                   engg2p=ttslab.fromfile(ENGG2P_FILE),
                                   engpronundict=ttslab.fromfile(ENGPRONUNDICT_FILE),
                                   engpronunaddendum={},
                                   synthesizer=SynthesizerHTSME(voice=None, models_dir=None))
    ttslab.tofile(voice, "frontend.multihts.voice.pickle")
            currentphrase["name"] = "BB"
            currentphrase.add_daughter(word)
        elif prevseg["name"] == "pau" and (prevseg["end"] - prevseg["start"]) < thresh:
            prevseg.remove_content()
            currentphrase.add_daughter(word)
        else:
            currentphrase.add_daughter(word)
    for phrase in phraserel:
        phrase["start"] = phrase.first_daughter["start"]
        phrase["end"] = phrase.last_daughter["end"]
    return u


if __name__ == "__main__":
    uttin = sys.argv[1]
    try:
        thresh = float(sys.argv[2])  # in seconds
    except IndexError:
        thresh = PAUSE_LEN_THRESH
    try:
        uttoutdir = sys.argv[3]
    except IndexError:
        uttoutdir = os.getcwd()

    u = ttslab.fromfile(uttin)
    u.fill_startendtimes()
    u = remphraserel(u)
    u = phraserelfrompauses(u, thresh)

    ttslab.tofile(u, os.path.join(uttoutdir, u["file_id"] + ".utt.pickle"))
Example #40
0
 def loadvoice(self, name, voice_location):
     log.info("Loading voice from file '%s'" % (voice_location))
     self.voices[name] = ttslab.fromfile(voice_location)
     log.info("Voice '%s' loaded." % (name))
Example #41
0
        starttime = endtime

    return lab


if __name__ == "__main__":
    try:
        switch = sys.argv[1]
        voicefile = sys.argv[2]
        infilename = sys.argv[3]
    except IndexError:
        print("usage: utt2lab.py [mono|full] [VOICEFILE] [INFILENAME]")
        sys.exit(1)

    #Load voice and utt and link...
    voice = ttslab.fromfile(voicefile)
    utt = ttslab.fromfile(infilename)
    utt.voice = voice

    if switch == "mono":
        #t1 = time.time()
        lab = utt2lab_mono(utt)
        #print("Time: " + str(time.time() - t1))
    elif switch == "full":
        #t1 = time.time()
        lab = utt2lab_full(utt)
        #print("Time: " + str(time.time() - t1))
    else:
        print("Invalid switch: %s" % (switch))
        sys.exit(1)
    vectors = voice.pitchmodel(utt, ("feats", None))["sylpitchfeats"]
    for vector, syl in zip(vectors, utt.gr("Syllable")):
        vector.extend([syl["qta_endheight"], syl["qta_slope"]])
    return vectors


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'voicefn',
        metavar='VOICEFN',
        type=str,
        help="Voice containing PitchModel implementation (.voice.pickle)")
    parser.add_argument(
        'uttfn',
        metavar='UTTFN',
        type=str,
        help=
        "annotated Utterance file, i.e. containing qTA parameters (.utt.pickle)"
    )
    args = parser.parse_args()

    voice = ttslab.fromfile(args.voicefn)
    utt = ttslab.fromfile(args.uttfn)

    for vector in process_utt(voice, utt):
        print(" ".join(map(str, vector)))
Example #43
0
def prepredict(wordsfn, g2p, skipwords):
    with codecs.open(wordsfn, encoding="utf-8") as infh:
        words = [
            word.strip() for word in infh.readlines()
            if word.strip() not in skipwords
        ]
    pronundict = {}
    numwords = len(words)
    for i, word in enumerate(words):
        print(("%s/%s: %s" % (i + 1, numwords, word)).encode("utf-8"))
        pronundict[word] = g2p.predict_word(word)
    return pronundict


if __name__ == "__main__":
    phset = ttslab.fromfile(PHSET_FILE)
    phmap = dict([(v, k) for k, v in phset.map.items()])
    assert len(phmap) == len(phset.map), "mapping not one-to-one..."
    #load
    #MAIN
    try:
        pronundict = PronunciationDictionary().fromtextfile(PRONUNDICT_INFN,
                                                            phonemap=phmap)
    except IOError:
        print("WARNING: Could not find '%s'" % PRONUNDICT_INFN)
        pronundict = PronunciationDictionary().fromsimpletextfile(
            DICT_INFN, phonemap=phmap)
    #ADDENDUM
    try:
        addendum = PronunciationDictionary().fromtextfile(ADDENDUM_INFN,
                                                          phonemap=phmap)
Example #44
0
 def _load_unitcatalogue(self, unitcataloguefile):
     self.unitcatalogue = ttslab.fromfile(unitcataloguefile)
Example #45
0
    def on_button_playwordorig_clicked(self, obj):
        self.origwordcontextwav.play()

    def on_button_playwordsynth_clicked(self, obj):
        self.synthwordcontextwav.play()

    def on_toolbutton_open_clicked(self, obj):
        chooser = gtk.FileChooserDialog(title=None,
                                        action=gtk.FILE_CHOOSER_ACTION_OPEN,
                                        buttons=(gtk.STOCK_CANCEL,
                                                 gtk.RESPONSE_CANCEL,
                                                 gtk.STOCK_OPEN,
                                                 gtk.RESPONSE_OK))
        chooser.set_current_folder(os.getcwd())
        response = chooser.run()
        if response == gtk.RESPONSE_OK:
            filename = chooser.get_filename()
            worklist = loadworklist(filename)
            self.corpusview = CorpusView(worklist, self.phmap)
        elif response == gtk.RESPONSE_CANCEL:
            print('Closed, no files selected')
        chooser.destroy()
        self.update_uttview()
        self.update_wordview()

if __name__ == "__main__":
    voice = ttslab.fromfile(sys.argv[1])
    app = SpeechbrowserApp(voice.phonemap)
    gtk.main()
Example #46
0
# encoding: utf-8
import ttslab

voice = ttslab.fromfile("wordus.voice.pickle")
utt = voice.synthesize(u'Mea nsia na hunu ewiem nsakrayɛ aa ɛwɔ Kumasi',
                       "text-to-wave")
utt["waveform"].write("test.wav")
            currentphrase.add_daughter(word)
        elif prevseg["name"] == "pau" and (prevseg["end"] -
                                           prevseg["start"]) < thresh:
            prevseg.remove_content()
            currentphrase.add_daughter(word)
        else:
            currentphrase.add_daughter(word)
    for phrase in phraserel:
        phrase["start"] = phrase.first_daughter["start"]
        phrase["end"] = phrase.last_daughter["end"]
    return u


if __name__ == "__main__":
    uttin = sys.argv[1]
    try:
        thresh = float(sys.argv[2])  #in seconds
    except IndexError:
        thresh = PAUSE_LEN_THRESH
    try:
        uttoutdir = sys.argv[3]
    except IndexError:
        uttoutdir = os.getcwd()

    u = ttslab.fromfile(uttin)
    u.fill_startendtimes()
    u = remphraserel(u)
    u = phraserelfrompauses(u, thresh)

    ttslab.tofile(u, os.path.join(uttoutdir, u["file_id"] + ".utt.pickle"))
Example #48
0
#!/usr/bin/env python
from __future__ import division

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import sys

import ttslab

from qta3 import plotstuff

if __name__ == "__main__":
    prefix = sys.argv[1]
    utt = ttslab.fromfile(sys.argv[2])
    reff0 = ttslab.fromfile(sys.argv[3])
    qtaf0 = ttslab.fromfile(sys.argv[4])

    plotstuff(utt, reff0, qtaf0, prefix=prefix)

Example #49
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Print utterance structure...
"""
from __future__ import unicode_literals, division, print_function #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import sys

import numpy as np

import ttslab
import ufuncs_analysis
import ttslab_dtw

if __name__ == '__main__':
    uttfn = sys.argv[1]

    u = ttslab.fromfile(uttfn)
    t = ufuncs_analysis.utt_mceps(u, shift=0.001)
    dtwalignment = ttslab_dtw.dtw_align(t.values, t.values)

    # for i, e in enumerate(dtwalignment):
    #     np.savetxt("pyx.%s.out" % i, e) 
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Generates a list of transcriptions that changed during a
    speechbrowser session.
"""
from __future__ import unicode_literals, division, print_function #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import os
import sys
import codecs

import ttslab

if __name__ == "__main__":
    transcrlist, pronunlist, commentlist = ttslab.fromfile(sys.argv[1])
    transcr = {}
    pronun = {}
    for k in sorted(transcrlist):
        u = ttslab.fromfile(k)
        #print(u["text"], transcrlist[k])
        if u["text"] != transcrlist[k]:
            transcr[os.path.basename(k)[:-len(".utt.pickle")]] = transcrlist[k]
    with codecs.open("newutts.data", "w", encoding="utf-8") as outfh:
        for k in sorted(transcr):
            outfh.write('( %s "%s" )\n' % (k, transcr[k]))
Example #51
0
    def on_button_playwordorig_clicked(self, obj):
        self.origwordcontextwav.play()

    def on_button_playwordsynth_clicked(self, obj):
        self.synthwordcontextwav.play()

    def on_toolbutton_open_clicked(self, obj):
        chooser = gtk.FileChooserDialog(title=None,
                                        action=gtk.FILE_CHOOSER_ACTION_OPEN,
                                        buttons=(gtk.STOCK_CANCEL,
                                                 gtk.RESPONSE_CANCEL,
                                                 gtk.STOCK_OPEN,
                                                 gtk.RESPONSE_OK))
        chooser.set_current_folder(os.getcwd())
        response = chooser.run()
        if response == gtk.RESPONSE_OK:
            filename = chooser.get_filename()
            worklist = loadworklist(filename)
            self.corpusview = CorpusView(worklist, self.voice)
        elif response == gtk.RESPONSE_CANCEL:
            print('Closed, no files selected')
        chooser.destroy()
        self.update_uttview()
        self.update_wordview()

if __name__ == "__main__":
    voice = ttslab.fromfile(sys.argv[1])
    app = SpeechbrowserApp(voice)
    gtk.main()
Example #52
0
NASAL = set(["manner_nasal"])
APPROXIMANT = set(["manner_approximant", "manner_trill"])

SHORT = set(["duration_short"])
LONG = set(["duration_long"])
DIPH = set(["duration_diphthong"])

VOICED = set(["vowel", "voiced"])

if __name__ == "__main__":
    try:
        voicefn = sys.argv[1]
    except IndexError:
        voicefn = None
    try:
        voice = ttslab.fromfile(voicefn or VOICEFN)
    except IOError:
        print("Could not find file: '%s'" % (VOICEFN))
        sys.exit(1)

    for lang in ["main"] + [k for k in voice.pronun if k != "main"]:
        phset = voice.pronun[lang]["phoneset"]

        for phn in phset.phones:
            phnfeats = phset.phones[phn]

            if lang == "main":
                p = voice.phonemap[phn]
            else:
                p = voice.phonemap[lang + "_" + phn]
Example #53
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Save waveform embedded in utt...
"""
from __future__ import unicode_literals, division, print_function #Py2

__author__ = "Daniel van Niekerk"
__email__ = "*****@*****.**"

import sys

import ttslab

WAV_EXT = "wav"

if __name__ == '__main__':
    try:
        uttfn = sys.argv[1]
    except IndexError:
        print("USAGE: utt2textgrid.py UTTFNAME [WAVEFNAME]")
        sys.exit()

    utt = ttslab.fromfile(uttfn)
    try:
        wavfn = sys.argv[2]
    except IndexError:
        wavfn = ".".join([utt["file_id"], WAV_EXT])
    
    utt["waveform"].write(wavfn)
Example #54
0
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()
    for unit, word in zip(u.gr("Unit"), u.gr("Word")):
        assert unit["name"] == word["name"]
        unit["start"] = word["start"]
        unit["end"] = word["end"]

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join(
        [os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    for i, unit in enumerate(u.gr("Unit")):
        if i == 0:
            boundarytimes.append(unit["start"])
        boundarytimes.append(unit["end"])

    #convert boundtimes into sample ranges:
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []
    for bound in boundarytimes:
        lpcsampleranges.append(lpctrack.index_at(bound))
        f0sampleranges.append(f0track.index_at(bound))
        joinsamples.append(jointrack.values[jointrack.index_at(bound)])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()

    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1],
                                                   joinsamples[1:],
                                                   lpcsampleranges[:-1],
                                                   lpcsampleranges[1:],
                                                   f0sampleranges[:-1],
                                                   f0sampleranges[1:], units):
        #        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(
            lti0, lti1, copy=True)  #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(
            restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
            restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
def add_feats_to_utt(args):
    u, lpc_dir, joincoef_dir, f0_dir = args

    file_id = u["file_id"]
    print("Processing:", file_id)
    u.fill_startendtimes()

    lpctrack = Track()
    lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT]))
    restrack = Track()
    restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT]))
    jointrack = ttslab.fromfile(".".join(
        [os.path.join(joincoef_dir, file_id), JOIN_EXT]))
    f0track = Track()
    f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT]))

    #get boundarytimes:
    boundarytimes = []
    durations = []
    starttime = 0.0
    for seg in u.get_relation("Segment"):
        endtime = float(seg["end"])
        if "cl_end" in seg:
            splittime = float(seg["cl_end"])
        else:
            splittime = (endtime + starttime) / 2
            #TODO: should still add 25% split if diphthong...
        boundarytimes.append([starttime, splittime, endtime])
        durations.extend([splittime - starttime, endtime - splittime])
        starttime = endtime

    #convert boundtimes into sample ranges (and flatten):
    lpcsampleranges = []
    f0sampleranges = []
    joinsamples = []

    #DEMITASSE: If not pruning pau halfphones:
    # for bounds in boundarytimes:
    #     lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]),
    #                             lpctrack.get_index_at(bounds[1])])
    #     joinsamples.extend([jointrack.get_sample_at(bounds[0]),
    #                         jointrack.get_sample_at(bounds[1])])
    # lpcsampleranges.append(len(lpctrack))
    # joinsamples.append(jointrack.get_sample_at(len(jointrack)))

    #DEMITASSE: If pruning pau halfphones:
    durations = durations[1:-1]
    for i, bounds in enumerate(boundarytimes):
        if i == 0:
            lpcsampleranges.append(lpctrack.index_at(bounds[1]))
            f0sampleranges.append(f0track.index_at(bounds[1]))
            joinsamples.append(jointrack.values[bounds[1]])
        else:
            lpcsampleranges.extend(
                [lpctrack.index_at(bounds[0]),
                 lpctrack.index_at(bounds[1])])
            f0sampleranges.extend(
                [f0track.index_at(bounds[0]),
                 f0track.index_at(bounds[1])])
            joinsamples.extend(
                [jointrack.values[bounds[0]], jointrack.values[bounds[1]]])

    #get pitchperiods at lpc indices
    lpctimes = np.concatenate(([0.0], lpctrack.times))
    pitchperiod = np.diff(lpctimes)

    units = u.get_relation("Unit").as_list()

    assert len(units) == len(lpcsampleranges) - 1
    for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(
            joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1],
            lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:],
            durations, units):
        #        print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack))
        i["left-joincoef"] = jc0
        i["right-joincoef"] = jc1
        i["lpc-coefs"] = lpctrack.slice(
            lti0, lti1, copy=True)  #like python indexing/slicing
        if lti0 == 0:
            i["lpc-coefs"].starttime = 0.0
        else:
            i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1]
        i["lpc-coefs"].zero_starttime()
        i["dur"] = dur
        #For windowfactor=2 (save only samples and assume 16kHz)
        i["residuals"] = restrack.slice(
            restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]),
            restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values
    return u
    except IOError:
        pass
    return pronundict

def prepredict(wordsfn, g2p, skipwords):
    with codecs.open(wordsfn, encoding="utf-8") as infh:
        words = [word.strip() for word in infh.readlines() if word.strip() not in skipwords]
    pronundict = {}
    numwords = len(words)
    for i, word in enumerate(words):
        print("%s/%s: %s" % (i+1, numwords, word))
        pronundict[word] = g2p.predict_word(word)
    return pronundict

if __name__ == "__main__":
    phset = ttslab.fromfile(PHSET_FILE)
    phmap = dict([(v, k) for k, v in phset.map.items()])
    assert len(phmap) == len(phset.map), "mapping not one-to-one..."
    g2p = ttslab.fromfile(G2P_FILE)
    #load
    try:
        pronundict = PronunciationDictionary()
        pronundict.fromtextfile(PRONUNDICT_INFN, phmap)
    except IOError:
        pronundict = load_simplepronundict(DICT_INFN, phmap)
    addendum = load_simplepronundict(ADDENDUM_INFN, phmap)
    #pre-predict from wordlist and add to addendum
    try:
        skipwords = set(list(pronundict) + list(addendum))
        addendum.update(prepredict(WORDLIST_INFN, g2p, skipwords))
    except IOError: