def setup_segments(self, ): """Create segments in data directory""" target = os.path.join(self._output_path(), 'segments') # even if there is only one utterance per wav, we add the # tstart/tstop in the segment file CorpusSaver.save_segments(self.corpus, target, force_timestamps=True)
def save(self, path, no_wavs=False, copy_wavs=True, force=False): """Save the corpus to the directory `path` :param str path: The output directory is assumed to be a non existing directory (or use force=True to overwrite it). :param bool no_wavs: when True, dont save the wavs (ie don't write wavs subdir in `path`) :param bool copy_wavs: when True, make a copy of the wavs instead of symbolic links :param bool force: when True, overwrite `path` if it is already existing :raise: OSError if force=False and `path` already exists """ self.log.info('saving corpus to %s', path) if force and os.path.exists(path): self.log.warning('overwriting existing path: %s', path) utils.remove(path) CorpusSaver.save(self, path, no_wavs=no_wavs, copy_wavs=copy_wavs)
def setup_silences(self): """Create data/local/self.name/{silences, optional_silence}.txt""" local_path = self._local_path() CorpusSaver.save_silences( self.corpus, os.path.join(local_path, 'silence_phones.txt')) target = os.path.join(local_path, 'optional_silence.txt') with open_utf8(target, 'w') as out: out.write(u'SIL\n')
def setup_utt2spk(self): """Create utt2spk and spk2utt in data directory""" target = os.path.join(self._output_path(), 'utt2spk') CorpusSaver.save_utt2spk(self.corpus, target) # create spk2utt target = os.path.join(self._output_path(), 'spk2utt') with open_utf8(target, 'w') as out: for spk, utt in sorted(self.corpus.spk2utt().iteritems()): out.write(u'{} {}\n'.format(spk, ' '.join(sorted(utt))))
def setup_lexicon(self): """Create data/local/self.name/lexicon.txt""" target = os.path.join(self._local_path(), 'lexicon.txt') CorpusSaver.save_lexicon(self.corpus, target) return target
def setup_wav_folder(self): """using a symbolic link to avoid copying voluminous data""" target = os.path.join(self.recipe_dir, 'wavs') CorpusSaver.save_wavs(self.corpus, target)
def setup_text(self): """Create text in data directory""" target = os.path.join(self._output_path(), 'text') CorpusSaver.save_text(self.corpus, target) return target
def setup_variants(self): """Create data/local/`name`/extra_questions.txt""" target = os.path.join(self._local_path(), 'extra_questions.txt') CorpusSaver.save_variants(self.corpus, target)
def setup_segments(self,): """Create segments in data directory""" target = os.path.join(self._output_path(), 'segments') # write only if starts and stops are specified in segments.txt if self.corpus.has_several_utts_per_wav(): CorpusSaver.save_segments(self.corpus, target)