def write_audio_tracks(self, inputaudio, units, diralign, silence=0.): """ Write the first channel of an audio file into separated track files. Re-sample to 16000 Hz, 16 bits. @param inputaudio (src - IN) File name of the audio file. @param units (list - IN) List of tuples (start-time,end-time) of tracks. @param diralign (str - IN) Directory to write audio tracks. @param silence float - IN) Duration of a silence to surround the tracks. """ channel = autils.extract_audio_channel( inputaudio,0 ) channel = autils.format_channel( channel,16000,2 ) for track,u in enumerate(units): (s,e) = u trackchannel = autils.extract_channel_fragment( channel, s, e, silence) trackname = self._tracknames.audiofilename(diralign, track+1) autils.write_channel(trackname, trackchannel)
def _fix_window_asr(self, fromtime, totime, fromtoken, totoken, channel, pronlist, toklist, diralign, N): """ Fix asr result in a window. Return the list of anchors the ASR found in that window. """ # create audio file fnw = os.path.join(diralign, "asr") fna = os.path.join(diralign, "asr.wav") trackchannel = autils.extract_channel_fragment( channel, fromtime, totime, 0.2) autils.write_channel( fna, trackchannel ) # call the ASR engine to recognize tokens of this track self._aligner.set_phones( " ".join( pronlist[fromtoken:totoken] ) ) self._aligner.set_tokens( " ".join( toklist[fromtoken:totoken] ) ) self._aligner.run_alignment(fna, fnw, N) # get the tokens time-aligned by the ASR engine wordalign = self._alignerio.read_aligned(fnw)[1] # (starttime,endtime,label,score) wordalign = self._adjust_asr_result(wordalign, fromtime, 0.2) # ignore the last word: we can't know if the word is entire or was cut if len(wordalign) > 3: wordalign.pop() # list of tokens the ASR automatically time-aligned tman = [token for token in toklist[fromtoken:totoken]] # list of tokens manually transcribed tasr = [(token,score) for (start,end,token,score) in wordalign] # Find matching tokens: the anchors matchingslist = self._fix_matchings_list( tman,tasr,N ) anchors = [] for match in matchingslist: i = match[0] # ref hi = match[1] # hyp s = wordalign[hi][0] e = wordalign[hi][1] anchors.append( (s,e,fromtoken+i) ) return anchors