Example #1
0
    def setup_segments(self, ):
        """Create segments in data directory"""
        target = os.path.join(self._output_path(), 'segments')

        # even if there is only one utterance per wav, we add the
        # tstart/tstop in the segment file
        CorpusSaver.save_segments(self.corpus, target, force_timestamps=True)
Example #2
0
    def save(self, path, no_wavs=False, copy_wavs=True, force=False):
        """Save the corpus to the directory `path`

        :param str path: The output directory is assumed to be a non
            existing directory (or use force=True to overwrite it).

        :param bool no_wavs: when True, dont save the wavs (ie don't
            write wavs subdir in `path`)

        :param bool copy_wavs: when True, make a copy of the wavs
            instead of symbolic links

        :param bool force: when True, overwrite `path` if it is
            already existing

        :raise: OSError if force=False and `path` already exists

        """
        self.log.info('saving corpus to %s', path)

        if force and os.path.exists(path):
            self.log.warning('overwriting existing path: %s', path)
            utils.remove(path)

        CorpusSaver.save(self, path, no_wavs=no_wavs, copy_wavs=copy_wavs)
Example #3
0
    def setup_silences(self):
        """Create data/local/self.name/{silences, optional_silence}.txt"""
        local_path = self._local_path()
        CorpusSaver.save_silences(
            self.corpus, os.path.join(local_path, 'silence_phones.txt'))

        target = os.path.join(local_path, 'optional_silence.txt')
        with open_utf8(target, 'w') as out:
            out.write(u'SIL\n')
Example #4
0
    def setup_utt2spk(self):
        """Create utt2spk and spk2utt in data directory"""
        target = os.path.join(self._output_path(), 'utt2spk')
        CorpusSaver.save_utt2spk(self.corpus, target)

        # create spk2utt
        target = os.path.join(self._output_path(), 'spk2utt')
        with open_utf8(target, 'w') as out:
            for spk, utt in sorted(self.corpus.spk2utt().iteritems()):
                out.write(u'{} {}\n'.format(spk, ' '.join(sorted(utt))))
Example #5
0
 def setup_lexicon(self):
     """Create data/local/self.name/lexicon.txt"""
     target = os.path.join(self._local_path(), 'lexicon.txt')
     CorpusSaver.save_lexicon(self.corpus, target)
     return target
Example #6
0
 def setup_wav_folder(self):
     """using a symbolic link to avoid copying voluminous data"""
     target = os.path.join(self.recipe_dir, 'wavs')
     CorpusSaver.save_wavs(self.corpus, target)
Example #7
0
 def setup_text(self):
     """Create text in data directory"""
     target = os.path.join(self._output_path(), 'text')
     CorpusSaver.save_text(self.corpus, target)
     return target
Example #8
0
 def setup_variants(self):
     """Create data/local/`name`/extra_questions.txt"""
     target = os.path.join(self._local_path(), 'extra_questions.txt')
     CorpusSaver.save_variants(self.corpus, target)
Example #9
0
 def setup_segments(self,):
     """Create segments in data directory"""
     target = os.path.join(self._output_path(), 'segments')
     # write only if starts and stops are specified in segments.txt
     if self.corpus.has_several_utts_per_wav():
         CorpusSaver.save_segments(self.corpus, target)