def run(self):
        c = corpus.Corpus()
        nc = corpus.Corpus()

        c.load(tk.uncached_path(self.corpus_file))
        nc.name = c.name
        nc.speakers = c.speakers
        nc.default_speaker = c.default_speaker
        nc.speaker_name = c.speaker_name
        # store index of last segment
        for r in c.recordings:
            nr = corpus.Recording()
            nr.name = r.name
            nr.segments = r.segments
            nr.speaker_name = r.speaker_name
            nr.speakers = r.speakers
            nr.default_speaker = r.default_speaker

            audio_name = r.audio.split("/")[-1]

            if self.output_format is not None:
                name, ext = os.path.splitext(audio_name)
                audio_name = name + "." + self.output_format

            nr.audio = os.path.join(tk.uncached_path(self.audio_folder),
                                    audio_name)
            nc.add_recording(nr)

        from multiprocessing import pool
        p = pool.Pool(4)
        p.map(self.perform_ffmpeg, c.recordings)

        nc.dump(tk.uncached_path(self.out))
    def run(self):
        merged_corpus = corpus.Corpus()
        merged_corpus.name = self.name
        for corpus_path in self.corpora:
            c = corpus.Corpus()
            c.load(str(corpus_path))
            if self.subcorpora:
                merged_corpus.add_subcorpus(c)
            else:
                for rec in c.all_recordings():
                    merged_corpus.add_recording(rec)

        merged_corpus.dump(tk.uncached_path(self.merged_corpus))
    def run(self):
        import zipfile
        zip_file = zipfile.ZipFile(tk.uncached_path(self.out),
                                   mode='w',
                                   compression=zipfile.ZIP_STORED)
        dict_file_path = self.name + ".txt"
        dict_file = open(dict_file_path, "wt")
        dict_file.write("[\n")
        c = corpus.Corpus()
        assert len(c.subcorpora) == 0
        c.load(tk.uncached_path(self.corpus_file))

        if self.segment_file_path:
            if tk.uncached_path(self.segment_file_path).endswith("gz"):
                segment_file = gzip.open(
                    tk.uncached_path(self.segment_file_path), "rb")
            else:
                segment_file = open(tk.uncached_path(self.segment_file), "rt")
            segments = [line.decode().strip() for line in segment_file]

        for recording in c.recordings:
            # skip empty recordings
            if not recording.segments:
                continue

            # validate that each recording only contains one segment
            assert len(recording.segments) == 1
            segment = recording.segments[0]  # type:corpus.Segment

            segment_name = "/".join([c.name, recording.name, segment.name])
            if self.segment_file_path and segment_name not in segments:
                continue

            if not self.use_full_seq_name:
                segment_name = segment.name

            if self.no_audio:
                dict_file.write(
                    '{"duration": %f, "text": "%s", "seq_name": "%s"},\n' %
                    (segment.end, segment.orth.replace('"',
                                                       '\\"'), segment_name))
            else:
                audio_path = recording.audio
                arc_path = os.path.join(self.name,
                                        os.path.basename(audio_path))
                zip_file.write(audio_path, arcname=arc_path)
                dict_file.write(
                    '{"file": "%s", "duration": %f, "text": "%s", "seq_name": "%s"},\n'
                    % (os.path.basename(audio_path), segment.end,
                       segment.orth.replace('"', '\\"'), segment_name))

        dict_file.write(']\n')
        dict_file.close()

        zip_file.write(dict_file_path, dict_file_path)
        zip_file.close()
    def run(self):
        orth_c = corpus.Corpus()
        orth_c.load(tk.uncached_path(self.bliss_corpus))

        orths = {}
        for r in orth_c.all_recordings():
            assert len(
                r.segments) == 1, "needs to be a single segment recording"
            orth = r.segments[0].orth
            tag = r.segments[0].name
            orths[tag] = orth

        c = corpus.Corpus()
        c.load(tk.uncached_path(self.empty_bliss_corpus))

        for r in c.all_recordings():
            assert len(
                r.segments) == 1, "needs to be a single segment recording"
            tag = r.segments[0].name
            orth = orths[tag]
            r.segments[0].orth = orth

        c.dump(tk.uncached_path(self.out))
    def run(self):
        c = corpus.Corpus()
        c.load(tk.uncached_path(self.corpus_path))

        all_segments = list(c.segments())

        for idx, segments in enumerate(chunks(all_segments,
                                              self.num_segments)):
            with open(self.segment_files[idx].get_path(),
                      'wt') as segment_file:
                for segment in segments:
                    if self.use_fullname:
                        segment_file.write(segment.fullname() + '\n')
                    else:
                        segment_file.write(segment.name + '\n')
    def run(self):
        import soundfile
        c = corpus.Corpus()
        c.load(tk.uncached_path(self.bliss_corpus))

        for r in c.all_recordings():
            assert len(
                r.segments) == 1, "needs to be a single segment recording"
            old_duration = r.segments[0].end
            data, sample_rate = soundfile.read(open(r.audio, "rb"))
            new_duration = len(data) / sample_rate
            print("%s: %f vs. %f" %
                  (r.segments[0].name, old_duration, new_duration))
            r.segments[0].end = new_duration

        c.dump(tk.uncached_path(self.out))
Example #7
0
    def __init__(self, out_folder, out_corpus, sample_rate, window_shift,
                 window_size, n_fft, iterations, preemphasis, file_format,
                 corpus_format):
        self.out_folder = out_folder
        self.sample_rate = sample_rate
        self.window_shift = window_shift
        self.window_size = window_size
        self.n_fft = n_fft
        self.iterations = iterations
        self.preemphasis = preemphasis
        self.file_format = file_format
        self.corpus_format = corpus_format

        if self.corpus_format == "bliss":
            self.corpus_path = out_corpus
            self.corpus = bliss_corpus.Corpus()
            self.corpus.name = "GRIFFIN_LIM"

        self.tmp_path = tempfile.mkdtemp(prefix=gs.TMP_PREFIX)
    def run(self):
        import soundfile

        c = corpus.Corpus()
        c.load(tk.uncached_path(self.bliss_corpus))

        out_file = open(tk.uncached_path(self.out), "wt")

        success = True

        for r in c.all_recordings():
            try:
                audio, sr = soundfile.read(open(r.audio, "rb"))
                if self.channels == 1:
                    assert len(audio.shape) == 1
                else:
                    assert audio.shape[1] == self.channels
                assert sr == self.sample_rate
            except Exception as e:
                print("error in file %s: %s" % (r.audio, str(e)))
                out_file.write("error in file %s: %s\n" % (r.audio, str(e)))
                success = False

        assert success, "there was an error, please see error.log"
    def run(self):

        speaker_embedding_data = h5py.File(
            tk.uncached_path(self.speaker_embedding_hdf), 'r')
        speaker_embedding_inputs = speaker_embedding_data['inputs']
        speaker_embedding_raw_tags = speaker_embedding_data['seqTags']
        speaker_embedding_lengths = speaker_embedding_data['seqLengths']

        self.speaker_embedding_features = []
        self.speaker_embedding_tags = []
        offset = 0
        for tag, length in zip(speaker_embedding_raw_tags,
                               speaker_embedding_lengths):
            self.speaker_embedding_features.append(
                speaker_embedding_inputs[offset:offset + length[0]])
            self.speaker_embedding_tags.append(
                tag.decode() if isinstance(tag, bytes) else tag)
            offset += length[0]

        self.hdf_writer = SimpleHDFWriter(
            tk.uncached_path(self.out),
            dim=self.speaker_embedding_features[0].shape[-1])

        self.c = corpus.Corpus()
        self.c.load(tk.uncached_path(self.bliss_corpus))
        assert len(self.c.subcorpora) == 0

        mode = self.options.get('mode')
        if mode == "random":
            self._random()
        elif mode == "length_buckets":
            self._random_matching_length()
        else:
            assert False

        self.hdf_writer.close()
Example #10
0
    def _random_matching_length(self):

        text_corpus = corpus.Corpus()
        assert len(text_corpus.subcorpora) == 0
        text_corpus.load(tk.uncached_path(self.options['corpus']))

        text_durations = {}

        max_duration = 0
        for recording in text_corpus.recordings:
            assert len(recording.segments) == 1
            segment = recording.segments[0]  # type:corpus.Segment
            segment_name = "/".join(
                [self.c.name, recording.name, segment.name])
            if not self.use_full_seq_name:
                segment_name = segment.name
            seg_len = len(segment.orth)
            text_durations[segment_name] = seg_len
            if seg_len > max_duration:
                max_duration = seg_len

        bucket_size = int(self.options['bucket_size'])
        buckets = [[]
                   for i in range(0, max_duration + bucket_size, bucket_size)]
        bucket_indices = [0] * len(buckets)

        # fill buckets
        for tag, feature in zip(self.speaker_embedding_tags,
                                self.speaker_embedding_features):
            buckets[text_durations[tag] // bucket_size].append(feature)

        # shuffle buckets
        for bucket in buckets:
            random.shuffle(bucket)

        for recording in self.c.recordings:
            assert len(recording.segments) == 1
            segment = recording.segments[0]  # type:corpus.Segment
            segment_name = "/".join(
                [self.c.name, recording.name, segment.name])
            if not self.use_full_seq_name:
                segment_name = segment.name

            # search for nearest target bucket
            target_bucket = len(segment.orth) // bucket_size
            for i in range(1000):
                if 0 <= target_bucket + i < len(buckets) and len(
                        buckets[target_bucket + i]) > 0:
                    target_bucket = target_bucket + i
                    break
                if 0 <= target_bucket - i < len(buckets) and len(
                        buckets[target_bucket - i]) > 0:
                    target_bucket = target_bucket - i
                    break

            speaker_embedding = buckets[target_bucket][
                bucket_indices[target_bucket]]
            self.hdf_writer.insert_batch(numpy.asarray([speaker_embedding]),
                                         [1], [segment_name])
            bucket_indices[target_bucket] += 1
            if bucket_indices[target_bucket] >= len(buckets[target_bucket]):
                bucket_indices[target_bucket] = 0