def write_audio_tracks(self, inputaudio, units, diralign, silence=0.): """ Write the first channel of an audio file into separated track files. Re-sample to 16000 Hz, 16 bits. @param inputaudio (src - IN) File name of the audio file. @param units (list - IN) List of tuples (start-time,end-time) of tracks. @param diralign (str - IN) Directory to write audio tracks. @param silence float - IN) Duration of a silence to surround the tracks. """ channel = autils.extract_audio_channel( inputaudio,0 ) channel = autils.format_channel( channel,16000,2 ) for track,u in enumerate(units): (s,e) = u trackchannel = autils.extract_channel_fragment( channel, s, e, silence) trackname = self._tracknames.audiofilename(diralign, track+1) autils.write_channel(trackname, trackchannel)
def create_chunks(self, inputaudio, phontier, toktier, diralign): """ Create time-aligned tiers from raw intput tiers. @param phontier (Tier - IN) the tier with phonetization @param toktier (Tier - IN) the tier with tokenization to split @param diralign (str - IN) the directory to work. """ trsoutput = Transcription("Chunks") # Extract the audio channel channel = autils.extract_audio_channel( inputaudio,0 ) channel = autils.format_channel( channel,16000,2 ) # Extract the lists of tokens and their corresponding pronunciations pronlist = self._tier2raw( phontier,map=True ).split() toklist = self._tier2raw( toktier, map=False ).split() if len(pronlist) != len(toklist): raise IOError("Inconsistency between the number of items in phonetization %d and tokenization %d."%(len(pronlist),len(toklist))) # At a first stage, we'll find anchors. anchortier = AnchorTier() anchortier.set_duration( channel.get_duration() ) anchortier.set_extdelay(1.) anchortier.set_outdelay(0.5) # Search silences and use them as anchors. if self.SILENCES is True: anchortier.append_silences( channel ) # Estimates the speaking rate (amount of tokens/sec. in average) self._spkrate.eval_from_duration( channel.get_duration(), len(toklist) ) # Multi-pass ASR to find anchors A = -1 # number of anchors in the preceding pass N = self.N # decreasing N-gram value W = self.W # decreasing window length while A != anchortier.GetSize() and anchortier.check_holes_ntokens( self.NBT ) is False: anchortier.set_windelay( W ) A = anchortier.GetSize() logging.debug(" =========================================================== ") logging.debug(" Number of anchors: %d"%A) logging.debug(" N: %d"%N) logging.debug(" W: %d"%W) # perform ASR and append new anchors in the anchor tier (if any) self._asr(toklist, pronlist, anchortier, channel, diralign, N) # append the anchor tier as intermediate result if self.ANCHORS is True and A != anchortier.GetSize(): self._append_tier(anchortier,trsoutput) annotationdata.io.write( os.path.join(diralign,"ANCHORS-%d.xra"%anchortier.GetSize()),trsoutput ) # prepare next pass W = max(W-1., self.WMIN) N = max(N-1, self.NMIN) # Then, anchors are exported as tracks. tiert = anchortier.export(toklist) tiert.SetName("Chunks-Tokenized") tierp = anchortier.export(pronlist) tierp.SetName("Chunks-Phonetized") trsoutput.Append(tiert) trsoutput.Append(tierp) return trsoutput