def make_joincoefs(featconfig, wav_dir): """ Make joincoefs... """ mcep_dir = os.path.join(os.getcwd(), MCEP_DIR) os.mkdir(mcep_dir) join_dir = os.path.join(os.getcwd(), JOIN_DIR) os.mkdir(join_dir) pm_dir = os.path.join(os.getcwd(), PM_DIR) f0_dir = os.path.join(os.getcwd(), F0_DIR) fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER") melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER") melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS") preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF") window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR") window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE") print("MAKING JOINCOEFS...") map(extract_mceps, [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir) for wavfilename in sorted(glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))]) print("NORMALISING AND JOINING F0 AND MCEPS...") #Normalising mceps and f0s: upper = +1.0 lower = -1.0 mceptracks = {} for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))): t = Track() t.load_track(fn) mceptracks[os.path.basename(fn)] = t allmcepvecs = np.concatenate([mceptracks[tn].values for tn in sorted(mceptracks)]) mcepmean = allmcepvecs.mean(0) mcepstd = allmcepvecs.std(0) for k in mceptracks: mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower) f0tracks = {} for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))): t = Track() t.load_track(fn) f0tracks[os.path.basename(fn)] = t #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)]) allf0vecs = np.concatenate([f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks)]) f0mean = allf0vecs.mean(0) f0std = allf0vecs.std(0) for k in f0tracks: f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower) #Add f0 to mcep track: for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)): mceptracks[k1].values = np.concatenate((mceptracks[k1].values, f0tracks[k2].values), 1) for fn in mceptracks: basename = os.path.splitext(os.path.basename(fn))[0] ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
def utt_mceps(utt, shift=0.005, remove_pau=False, resettimes=False): temppath = mkdtemp() #wavs wfn1 = os.path.join(temppath, "1." + WAV_EXT) utt["waveform"].write(wfn1) #feats ffn1 = os.path.join(temppath, "1." + FEAT_EXT) cmds = SIG2FV % {"inputfile": wfn1, "outputfile": ffn1, "shift": shift} #print(cmds) os.system(cmds) #tracks t1 = Track() t1.load_track(ffn1) #cleanup shutil.rmtree(temppath) keep_intervals = [] if remove_pau: u = deepcopy(utt) fill_startendtimes(u) for seg in u.gr("Segment"): if seg["name"] != "pau": keep_intervals.append((seg["start"], seg["end"])) indices = t1.mask_indices(keep_intervals) t1.times = t1.times[indices] t1.values = t1.values[indices] if resettimes: t1.times = np.arange(1, len(t1.times) + 1, dtype=np.float) * shift return t1
def add_feats_to_utt(args): u, lpc_dir, joincoef_dir, f0_dir = args file_id = u["file_id"] print("Processing:", file_id) u.fill_startendtimes() for unit, word in zip(u.gr("Unit"), u.gr("Word")): assert unit["name"] == word["name"] unit["start"] = word["start"] unit["end"] = word["end"] lpctrack = Track() lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT])) restrack = Track() restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT])) jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT])) f0track = Track() f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT])) #get boundarytimes: boundarytimes = [] for i, unit in enumerate(u.gr("Unit")): if i == 0: boundarytimes.append(unit["start"]) boundarytimes.append(unit["end"]) #convert boundtimes into sample ranges: lpcsampleranges = [] f0sampleranges = [] joinsamples = [] for bound in boundarytimes: lpcsampleranges.append(lpctrack.index_at(bound)) f0sampleranges.append(f0track.index_at(bound)) joinsamples.append(jointrack.values[jointrack.index_at(bound)]) #get pitchperiods at lpc indices lpctimes = np.concatenate(([0.0], lpctrack.times)) pitchperiod = np.diff(lpctimes) units = u.get_relation("Unit").as_list() assert len(units) == len(lpcsampleranges) - 1 for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1], lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:], units): # print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack)) i["left-joincoef"] = jc0 i["right-joincoef"] = jc1 i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing if lti0 == 0: i["lpc-coefs"].starttime = 0.0 else: i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1] i["lpc-coefs"].zero_starttime() #For windowfactor=2 (save only samples and assume 16kHz) i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]), restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values return u
def utt_distance(utt, utt2, method="dtw", metric="euclidean", sig2fv=SIG2FV, VI=None): """ Uses Trackfile class' distance measurements to compare utts... See docstring in tfuncs_analysis.py for more details... """ temppath = mkdtemp() #wavs wfn1 = os.path.join(temppath, "1." + WAV_EXT) wfn2 = os.path.join(temppath, "2." + WAV_EXT) utt["waveform"].write(wfn1) utt2["waveform"].write(wfn2) #feats ffn1 = os.path.join(temppath, "1." + FEAT_EXT) ffn2 = os.path.join(temppath, "2." + FEAT_EXT) cmds = SIG2FV % {"inputfile": wfn1, "outputfile": ffn1} #print(cmds) os.system(cmds) cmds = SIG2FV % {"inputfile": wfn2, "outputfile": ffn2} #print(cmds) os.system(cmds) #tracks t1 = Track() t1.load_track(ffn1) t2 = Track() t2.load_track(ffn2) #compare and save t3 = t1.distances(t2, method=method, metric=metric, VI=VI) shutil.rmtree(temppath) return t3
def add_feats_to_utt(args): u, lpc_dir, joincoef_dir, f0_dir = args file_id = u["file_id"] print("Processing:", file_id) u.fill_startendtimes() lpctrack = Track() lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT])) restrack = Track() restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT])) jointrack = ttslab.fromfile(".".join( [os.path.join(joincoef_dir, file_id), JOIN_EXT])) f0track = Track() f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT])) #get boundarytimes: boundarytimes = [] durations = [] starttime = 0.0 for seg in u.get_relation("Segment"): endtime = float(seg["end"]) if "cl_end" in seg: splittime = float(seg["cl_end"]) else: splittime = (endtime + starttime) / 2 #TODO: should still add 25% split if diphthong... boundarytimes.append([starttime, splittime, endtime]) durations.extend([splittime - starttime, endtime - splittime]) starttime = endtime #convert boundtimes into sample ranges (and flatten): lpcsampleranges = [] f0sampleranges = [] joinsamples = [] #DEMITASSE: If not pruning pau halfphones: # for bounds in boundarytimes: # lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]), # lpctrack.get_index_at(bounds[1])]) # joinsamples.extend([jointrack.get_sample_at(bounds[0]), # jointrack.get_sample_at(bounds[1])]) # lpcsampleranges.append(len(lpctrack)) # joinsamples.append(jointrack.get_sample_at(len(jointrack))) #DEMITASSE: If pruning pau halfphones: durations = durations[1:-1] for i, bounds in enumerate(boundarytimes): if i == 0: lpcsampleranges.append(lpctrack.index_at(bounds[1])) f0sampleranges.append(f0track.index_at(bounds[1])) joinsamples.append(jointrack.values[bounds[1]]) else: lpcsampleranges.extend( [lpctrack.index_at(bounds[0]), lpctrack.index_at(bounds[1])]) f0sampleranges.extend( [f0track.index_at(bounds[0]), f0track.index_at(bounds[1])]) joinsamples.extend( [jointrack.values[bounds[0]], jointrack.values[bounds[1]]]) #get pitchperiods at lpc indices lpctimes = np.concatenate(([0.0], lpctrack.times)) pitchperiod = np.diff(lpctimes) units = u.get_relation("Unit").as_list() assert len(units) == len(lpcsampleranges) - 1 for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip( joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1], lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:], durations, units): # print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack)) i["left-joincoef"] = jc0 i["right-joincoef"] = jc1 i["lpc-coefs"] = lpctrack.slice( lti0, lti1, copy=True) #like python indexing/slicing if lti0 == 0: i["lpc-coefs"].starttime = 0.0 else: i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1] i["lpc-coefs"].zero_starttime() i["dur"] = dur #For windowfactor=2 (save only samples and assume 16kHz) i["residuals"] = restrack.slice( restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]), restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values return u
def make_joincoefs(featconfig, wav_dir): """ Make joincoefs... """ mcep_dir = os.path.join(os.getcwd(), MCEP_DIR) os.mkdir(mcep_dir) join_dir = os.path.join(os.getcwd(), JOIN_DIR) os.mkdir(join_dir) pm_dir = os.path.join(os.getcwd(), PM_DIR) f0_dir = os.path.join(os.getcwd(), F0_DIR) fbank_order = featconfig.get("SIG2FV_MCEP", "FBANK_ORDER") melcep_order = featconfig.get("SIG2FV_MCEP", "MELCEP_ORDER") melcep_coefs = featconfig.get("SIG2FV_MCEP", "MELCEP_COEFS") preemph_coef = featconfig.get("SIG2FV_MCEP", "PREEMPH_COEF") window_factor = featconfig.get("SIG2FV_MCEP", "WINDOW_FACTOR") window_type = featconfig.get("SIG2FV_MCEP", "WINDOW_TYPE") print("MAKING JOINCOEFS...") map(extract_mceps, [(wavfilename, fbank_order, window_factor, preemph_coef, melcep_order, window_type, melcep_coefs, mcep_dir, pm_dir) for wavfilename in sorted( glob(os.path.join(wav_dir, ".".join(["*", WAV_EXT]))))]) print("NORMALISING AND JOINING F0 AND MCEPS...") #Normalising mceps and f0s: upper = +1.0 lower = -1.0 mceptracks = {} for fn in glob(os.path.join(mcep_dir, ".".join(["*", MCEP_EXT]))): t = Track() t.load_track(fn) mceptracks[os.path.basename(fn)] = t allmcepvecs = np.concatenate( [mceptracks[tn].values for tn in sorted(mceptracks)]) mcepmean = allmcepvecs.mean(0) mcepstd = allmcepvecs.std(0) for k in mceptracks: mceptracks[k].values = (mceptracks[k].values - mcepmean) / (4 * mcepstd) * (upper - lower) f0tracks = {} for fn in glob(os.path.join(f0_dir, ".".join(["*", F0_EXT]))): t = Track() t.load_track(fn) f0tracks[os.path.basename(fn)] = t #allf0vecs = np.concatenate([f0tracks[tn].values for tn in sorted(f0tracks)]) allf0vecs = np.concatenate([ f0tracks[tn].values[f0tracks[tn].values.nonzero()] for tn in sorted(f0tracks) ]) f0mean = allf0vecs.mean(0) f0std = allf0vecs.std(0) for k in f0tracks: f0tracks[k].values = (f0tracks[k].values - f0mean) / (4 * f0std) * (upper - lower) #Add f0 to mcep track: for k1, k2 in zip(sorted(mceptracks), sorted(f0tracks)): mceptracks[k1].values = np.concatenate( (mceptracks[k1].values, f0tracks[k2].values), 1) for fn in mceptracks: basename = os.path.splitext(os.path.basename(fn))[0] ttslab.tofile(mceptracks[fn], os.path.join(join_dir, basename + "." + JOIN_EXT))
def add_feats_to_utt(args): u, lpc_dir, joincoef_dir, f0_dir = args file_id = u["file_id"] print("Processing:", file_id) u.fill_startendtimes() lpctrack = Track() lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT])) restrack = Track() restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT])) jointrack = ttslab.fromfile(".".join([os.path.join(joincoef_dir, file_id), JOIN_EXT])) f0track = Track() f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT])) #get boundarytimes: boundarytimes = [] durations = [] starttime = 0.0 for seg in u.get_relation("Segment"): endtime = float(seg["end"]) if "cl_end" in seg: splittime = float(seg["cl_end"]) else: splittime = (endtime + starttime) / 2 #TODO: should still add 25% split if diphthong... boundarytimes.append([starttime, splittime, endtime]) durations.extend([splittime - starttime, endtime - splittime]) starttime = endtime #convert boundtimes into sample ranges (and flatten): lpcsampleranges = [] f0sampleranges = [] joinsamples = [] #DEMITASSE: If not pruning pau halfphones: # for bounds in boundarytimes: # lpcsampleranges.extend([lpctrack.get_index_at(bounds[0]), # lpctrack.get_index_at(bounds[1])]) # joinsamples.extend([jointrack.get_sample_at(bounds[0]), # jointrack.get_sample_at(bounds[1])]) # lpcsampleranges.append(len(lpctrack)) # joinsamples.append(jointrack.get_sample_at(len(jointrack))) #DEMITASSE: If pruning pau halfphones: durations = durations[1:-1] for i, bounds in enumerate(boundarytimes): if i == 0: lpcsampleranges.append(lpctrack.index_at(bounds[1])) f0sampleranges.append(f0track.index_at(bounds[1])) joinsamples.append(jointrack.values[bounds[1]]) else: lpcsampleranges.extend([lpctrack.index_at(bounds[0]), lpctrack.index_at(bounds[1])]) f0sampleranges.extend([f0track.index_at(bounds[0]), f0track.index_at(bounds[1])]) joinsamples.extend([jointrack.values[bounds[0]], jointrack.values[bounds[1]]]) #get pitchperiods at lpc indices lpctimes = np.concatenate(([0.0], lpctrack.times)) pitchperiod = np.diff(lpctimes) units = u.get_relation("Unit").as_list() assert len(units) == len(lpcsampleranges) - 1 for jc0, jc1, lti0, lti1, fti0, fti1, dur, i in zip(joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1], lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:], durations, units): # print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack)) i["left-joincoef"] = jc0 i["right-joincoef"] = jc1 i["lpc-coefs"] = lpctrack.slice(lti0, lti1, copy=True) #like python indexing/slicing if lti0 == 0: i["lpc-coefs"].starttime = 0.0 else: i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1] i["lpc-coefs"].zero_starttime() i["dur"] = dur #For windowfactor=2 (save only samples and assume 16kHz) i["residuals"] = restrack.slice(restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]), restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values return u
def add_feats_to_utt(args): u, lpc_dir, joincoef_dir, f0_dir = args file_id = u["file_id"] print("Processing:", file_id) u.fill_startendtimes() for unit, word in zip(u.gr("Unit"), u.gr("Word")): assert unit["name"] == word["name"] unit["start"] = word["start"] unit["end"] = word["end"] lpctrack = Track() lpctrack.load_track(".".join([os.path.join(lpc_dir, file_id), LPC_EXT])) restrack = Track() restrack.load_wave(".".join([os.path.join(lpc_dir, file_id), RES_EXT])) jointrack = ttslab.fromfile(".".join( [os.path.join(joincoef_dir, file_id), JOIN_EXT])) f0track = Track() f0track.load_track(".".join([os.path.join(f0_dir, file_id), F0_EXT])) #get boundarytimes: boundarytimes = [] for i, unit in enumerate(u.gr("Unit")): if i == 0: boundarytimes.append(unit["start"]) boundarytimes.append(unit["end"]) #convert boundtimes into sample ranges: lpcsampleranges = [] f0sampleranges = [] joinsamples = [] for bound in boundarytimes: lpcsampleranges.append(lpctrack.index_at(bound)) f0sampleranges.append(f0track.index_at(bound)) joinsamples.append(jointrack.values[jointrack.index_at(bound)]) #get pitchperiods at lpc indices lpctimes = np.concatenate(([0.0], lpctrack.times)) pitchperiod = np.diff(lpctimes) units = u.get_relation("Unit").as_list() assert len(units) == len(lpcsampleranges) - 1 for jc0, jc1, lti0, lti1, fti0, fti1, i in zip(joinsamples[:-1], joinsamples[1:], lpcsampleranges[:-1], lpcsampleranges[1:], f0sampleranges[:-1], f0sampleranges[1:], units): # print(i["name"], "lpctrack[%s:%s]" % (lti0, lti1), "len(lpctrack)=%s" % len(lpctrack)) i["left-joincoef"] = jc0 i["right-joincoef"] = jc1 i["lpc-coefs"] = lpctrack.slice( lti0, lti1, copy=True) #like python indexing/slicing if lti0 == 0: i["lpc-coefs"].starttime = 0.0 else: i["lpc-coefs"].starttime = lpctrack.times[lti0 - 1] i["lpc-coefs"].zero_starttime() #For windowfactor=2 (save only samples and assume 16kHz) i["residuals"] = restrack.slice( restrack.index_at(lpctrack.times[lti0] - pitchperiod[lti0]), restrack.index_at(lpctrack.times[lti1] + pitchperiod[lti0])).values return u