def run(self): c = corpus.Corpus() nc = corpus.Corpus() c.load(tk.uncached_path(self.corpus_file)) nc.name = c.name nc.speakers = c.speakers nc.default_speaker = c.default_speaker nc.speaker_name = c.speaker_name # store index of last segment for r in c.recordings: nr = corpus.Recording() nr.name = r.name nr.segments = r.segments nr.speaker_name = r.speaker_name nr.speakers = r.speakers nr.default_speaker = r.default_speaker audio_name = r.audio.split("/")[-1] if self.output_format is not None: name, ext = os.path.splitext(audio_name) audio_name = name + "." + self.output_format nr.audio = os.path.join(tk.uncached_path(self.audio_folder), audio_name) nc.add_recording(nr) from multiprocessing import pool p = pool.Pool(4) p.map(self.perform_ffmpeg, c.recordings) nc.dump(tk.uncached_path(self.out))
def run(self): merged_corpus = corpus.Corpus() merged_corpus.name = self.name for corpus_path in self.corpora: c = corpus.Corpus() c.load(str(corpus_path)) if self.subcorpora: merged_corpus.add_subcorpus(c) else: for rec in c.all_recordings(): merged_corpus.add_recording(rec) merged_corpus.dump(tk.uncached_path(self.merged_corpus))
def run(self): import zipfile zip_file = zipfile.ZipFile(tk.uncached_path(self.out), mode='w', compression=zipfile.ZIP_STORED) dict_file_path = self.name + ".txt" dict_file = open(dict_file_path, "wt") dict_file.write("[\n") c = corpus.Corpus() assert len(c.subcorpora) == 0 c.load(tk.uncached_path(self.corpus_file)) if self.segment_file_path: if tk.uncached_path(self.segment_file_path).endswith("gz"): segment_file = gzip.open( tk.uncached_path(self.segment_file_path), "rb") else: segment_file = open(tk.uncached_path(self.segment_file), "rt") segments = [line.decode().strip() for line in segment_file] for recording in c.recordings: # skip empty recordings if not recording.segments: continue # validate that each recording only contains one segment assert len(recording.segments) == 1 segment = recording.segments[0] # type:corpus.Segment segment_name = "/".join([c.name, recording.name, segment.name]) if self.segment_file_path and segment_name not in segments: continue if not self.use_full_seq_name: segment_name = segment.name if self.no_audio: dict_file.write( '{"duration": %f, "text": "%s", "seq_name": "%s"},\n' % (segment.end, segment.orth.replace('"', '\\"'), segment_name)) else: audio_path = recording.audio arc_path = os.path.join(self.name, os.path.basename(audio_path)) zip_file.write(audio_path, arcname=arc_path) dict_file.write( '{"file": "%s", "duration": %f, "text": "%s", "seq_name": "%s"},\n' % (os.path.basename(audio_path), segment.end, segment.orth.replace('"', '\\"'), segment_name)) dict_file.write(']\n') dict_file.close() zip_file.write(dict_file_path, dict_file_path) zip_file.close()
def run(self): orth_c = corpus.Corpus() orth_c.load(tk.uncached_path(self.bliss_corpus)) orths = {} for r in orth_c.all_recordings(): assert len( r.segments) == 1, "needs to be a single segment recording" orth = r.segments[0].orth tag = r.segments[0].name orths[tag] = orth c = corpus.Corpus() c.load(tk.uncached_path(self.empty_bliss_corpus)) for r in c.all_recordings(): assert len( r.segments) == 1, "needs to be a single segment recording" tag = r.segments[0].name orth = orths[tag] r.segments[0].orth = orth c.dump(tk.uncached_path(self.out))
def run(self): c = corpus.Corpus() c.load(tk.uncached_path(self.corpus_path)) all_segments = list(c.segments()) for idx, segments in enumerate(chunks(all_segments, self.num_segments)): with open(self.segment_files[idx].get_path(), 'wt') as segment_file: for segment in segments: if self.use_fullname: segment_file.write(segment.fullname() + '\n') else: segment_file.write(segment.name + '\n')
def run(self): import soundfile c = corpus.Corpus() c.load(tk.uncached_path(self.bliss_corpus)) for r in c.all_recordings(): assert len( r.segments) == 1, "needs to be a single segment recording" old_duration = r.segments[0].end data, sample_rate = soundfile.read(open(r.audio, "rb")) new_duration = len(data) / sample_rate print("%s: %f vs. %f" % (r.segments[0].name, old_duration, new_duration)) r.segments[0].end = new_duration c.dump(tk.uncached_path(self.out))
def __init__(self, out_folder, out_corpus, sample_rate, window_shift, window_size, n_fft, iterations, preemphasis, file_format, corpus_format): self.out_folder = out_folder self.sample_rate = sample_rate self.window_shift = window_shift self.window_size = window_size self.n_fft = n_fft self.iterations = iterations self.preemphasis = preemphasis self.file_format = file_format self.corpus_format = corpus_format if self.corpus_format == "bliss": self.corpus_path = out_corpus self.corpus = bliss_corpus.Corpus() self.corpus.name = "GRIFFIN_LIM" self.tmp_path = tempfile.mkdtemp(prefix=gs.TMP_PREFIX)
def run(self): import soundfile c = corpus.Corpus() c.load(tk.uncached_path(self.bliss_corpus)) out_file = open(tk.uncached_path(self.out), "wt") success = True for r in c.all_recordings(): try: audio, sr = soundfile.read(open(r.audio, "rb")) if self.channels == 1: assert len(audio.shape) == 1 else: assert audio.shape[1] == self.channels assert sr == self.sample_rate except Exception as e: print("error in file %s: %s" % (r.audio, str(e))) out_file.write("error in file %s: %s\n" % (r.audio, str(e))) success = False assert success, "there was an error, please see error.log"
def run(self): speaker_embedding_data = h5py.File( tk.uncached_path(self.speaker_embedding_hdf), 'r') speaker_embedding_inputs = speaker_embedding_data['inputs'] speaker_embedding_raw_tags = speaker_embedding_data['seqTags'] speaker_embedding_lengths = speaker_embedding_data['seqLengths'] self.speaker_embedding_features = [] self.speaker_embedding_tags = [] offset = 0 for tag, length in zip(speaker_embedding_raw_tags, speaker_embedding_lengths): self.speaker_embedding_features.append( speaker_embedding_inputs[offset:offset + length[0]]) self.speaker_embedding_tags.append( tag.decode() if isinstance(tag, bytes) else tag) offset += length[0] self.hdf_writer = SimpleHDFWriter( tk.uncached_path(self.out), dim=self.speaker_embedding_features[0].shape[-1]) self.c = corpus.Corpus() self.c.load(tk.uncached_path(self.bliss_corpus)) assert len(self.c.subcorpora) == 0 mode = self.options.get('mode') if mode == "random": self._random() elif mode == "length_buckets": self._random_matching_length() else: assert False self.hdf_writer.close()
def _random_matching_length(self): text_corpus = corpus.Corpus() assert len(text_corpus.subcorpora) == 0 text_corpus.load(tk.uncached_path(self.options['corpus'])) text_durations = {} max_duration = 0 for recording in text_corpus.recordings: assert len(recording.segments) == 1 segment = recording.segments[0] # type:corpus.Segment segment_name = "/".join( [self.c.name, recording.name, segment.name]) if not self.use_full_seq_name: segment_name = segment.name seg_len = len(segment.orth) text_durations[segment_name] = seg_len if seg_len > max_duration: max_duration = seg_len bucket_size = int(self.options['bucket_size']) buckets = [[] for i in range(0, max_duration + bucket_size, bucket_size)] bucket_indices = [0] * len(buckets) # fill buckets for tag, feature in zip(self.speaker_embedding_tags, self.speaker_embedding_features): buckets[text_durations[tag] // bucket_size].append(feature) # shuffle buckets for bucket in buckets: random.shuffle(bucket) for recording in self.c.recordings: assert len(recording.segments) == 1 segment = recording.segments[0] # type:corpus.Segment segment_name = "/".join( [self.c.name, recording.name, segment.name]) if not self.use_full_seq_name: segment_name = segment.name # search for nearest target bucket target_bucket = len(segment.orth) // bucket_size for i in range(1000): if 0 <= target_bucket + i < len(buckets) and len( buckets[target_bucket + i]) > 0: target_bucket = target_bucket + i break if 0 <= target_bucket - i < len(buckets) and len( buckets[target_bucket - i]) > 0: target_bucket = target_bucket - i break speaker_embedding = buckets[target_bucket][ bucket_indices[target_bucket]] self.hdf_writer.insert_batch(numpy.asarray([speaker_embedding]), [1], [segment_name]) bucket_indices[target_bucket] += 1 if bucket_indices[target_bucket] >= len(buckets[target_bucket]): bucket_indices[target_bucket] = 0