def test_stem_id(): S, _ = stempeg.read_stems(stempeg.example_stem_path()) for k in range(S.shape[0]): Sk, _ = stempeg.read_stems(stempeg.example_stem_path(), stem_id=k) # test number of channels assert Sk.shape[-1] == 2 # test dim assert Sk.ndim == 2
def test_stem_id(): S, rate = stempeg.read_stems( "tests/data/The Easton Ellises - Falcon 69.stem.mp4" ) for k in range(S.shape[0]): Sk, rate = stempeg.read_stems( "tests/data/The Easton Ellises - Falcon 69.stem.mp4", stem_id=k ) assert Sk.ndim == 2
def test_duration(start, duration): fp = stempeg.example_stem_path() info = stempeg.Info(fp) if start: if start < min(info.duration_streams): S, _ = stempeg.read_stems(fp, start=start, duration=duration) else: S, rate = stempeg.read_stems(fp, start=start, duration=duration) if duration is not None: assert S.shape[1] == duration * rate
def test_ffmpeg_format(format, path): Sint, _ = stempeg.read_stems( path, dtype=np.float32, ffmpeg_format="s16le" ) Sfloat, _ = stempeg.read_stems( path, dtype=np.float32, ffmpeg_format="f32le" ) assert np.allclose(Sint, Sfloat)
def load_audio(self, path, stem_id, chunk_start=0, chunk_duration=None): """array_like: [shape=(num_samples, num_channels)] """ if os.path.exists(self.path): if not self.is_wav: # read using stempeg audio, rate = stempeg.read_stems(filename=path, stem_id=stem_id, start=chunk_start, duration=chunk_duration, info=self.info) else: chunk_start = int(chunk_start * self.rate) # check if dur is none if chunk_duration: # stop in soundfile is calc in samples, not seconds stop = chunk_start + int(chunk_duration * self.rate) else: stop = chunk_duration audio, rate = sf.read(path, always_2d=True, start=chunk_start, stop=stop) self._rate = rate return audio else: self._rate = None self._audio = None raise ValueError("Oops! %s cannot be loaded" % path)
def load_audio( self, path, stem_id, chunk_start=0, chunk_duration=None, sample_rate=None ): """array_like: [shape=(num_samples, num_channels)] """ if os.path.exists(self.path): if self.is_wav: stem_id = 0 audio, rate = stempeg.read_stems( filename=path, stem_id=stem_id, start=chunk_start, duration=chunk_duration, info=self.info, sample_rate=sample_rate, ffmpeg_format="s16le" ) self._rate = rate return audio else: self._rate = None self._audio = None raise ValueError("Oops! %s cannot be loaded" % path)
def stems(self): """array_like: [shape=(stems, num_samples, num_channels)] """ # return cached audio it explicitly set bet setter if self._stems is not None: return self._stems # read from disk to save RAM otherwise else: if not self.is_wav and os.path.exists(self.path): S, rate = stempeg.read_stems(filename=self.path, start=self.chunk_start, duration=self.chunk_duration, info=self.info) else: rate = self.rate S = [] S.append(self.audio) # append sources in order of stem_ids for k, v in sorted(self.sources.items(), key=lambda x: x[1].stem_id): S.append(v.audio) S = np.array(S) self._rate = rate return S
def test_nistems(): mp4exc = stempeg.cmds.find_cmd("MP4Box") stems, rate = stempeg.read_stems(stempeg.example_stem_path()) with tmp.NamedTemporaryFile(delete=False, suffix='.m4a') as tempfile: stempeg.write_stems(tempfile.name, stems, sample_rate=rate, writer=stempeg.NIStemsWriter()) callArgs = [mp4exc] callArgs.extend(["-dump-udta", "0:stem", tempfile.name]) sp.check_call(callArgs) root, ext = os.path.splitext(tempfile.name) udtaFile = root + "_stem.udta" with open(stempeg.default_metadata()) as f: d_metadata = json.load(f) try: fileObj = codecs.open(udtaFile, encoding="utf-8") fileObj.seek(8) l_metadata = json.load(fileObj) except json.decoder.JSONDecodeError: with open(udtaFile) as json_file: l_metadata = json.load(json_file) assert ordered(l_metadata) == ordered(d_metadata)
def test_shape(): S, rate = stempeg.read_stems( "tests/data/The Easton Ellises - Falcon 69.stem.mp4" ) assert S.shape[0] == 5 assert ((S.shape[1] % 1024) == 0 and S.shape[1] > 200000) assert S.shape[2] == 2
def process_file(filename, record_path, filenum): options = tf.python_io.TFRecordOptions( tf.python_io.TFRecordCompressionType.ZLIB, compression_strategy=zlib.Z_HUFFMAN_ONLY, mem_level=9, compression_level=3) writer = tf.python_io.TFRecordWriter( os.path.join(record_path, str(filenum) + '.tfrecord'), options) # print(filename) gc.collect() S, rate = stempeg.read_stems(filename, np.float32) print("File has %d samples" % S.shape[1]) Sr = resample_helper(S, rate) #S = nnresample.resample(S[:,1:100000, :], rate, RESAMPLED_RATE, 1) # print(np.array(S.shape, np.int32)[1:]) samples = Sr.shape[1] print("After resample: File has %d samples" % samples) for i in range(0, samples, FRAGMENT_OFFSET): if i + FRAGMENT_LENGTH <= samples: # Work around https://github.com/faroit/stempeg/issues/8 example = write_segment( Sr[:, i:i + FRAGMENT_LENGTH].astype(np.float32), rate) writer.write(example.SerializeToString()) writer.close()
def test_shape(nb_samples): R = np.random.random((5, nb_samples, 2)) stempeg.write_stems("./random.stem.m4a", R, writer=stempeg.StreamsWriter()) S, rate = stempeg.read_stems("./random.stem.m4a") assert S.shape[0] == R.shape[0] assert S.shape[2] == R.shape[2] assert S.shape[1] % 1024 == 0
def test_channels(audio, multichannel_format): if audio.ndim == 1: with tmp.NamedTemporaryFile(delete=False, suffix='.' + multichannel_format) as tempfile: stempeg.write_audio(tempfile.name, audio, sample_rate=44100) loaded_audio, rate = stempeg.read_stems(tempfile.name, ) assert audio.shape == loaded_audio.shape
def test_shape(nb_samples): R = np.random.random((5, nb_samples, 2)) stempeg.write_stems(R, "./random.stem.mp4") S, rate = stempeg.read_stems("./random.stem.mp4") assert S.shape[0] == R.shape[0] assert S.shape[2] == R.shape[2] assert S.shape[1] % 1024 == 0
def main(): # maximus=np.zeros(66) # minimus=np.ones(66)*1000 wav_files = [ x for x in os.listdir(config.wav_dir_mus) if x.endswith('.stem.mp4') and not x.startswith(".") ] count = 0 for lf in wav_files: # lf = "Actions - One Minute Smile.stem.mp4" # print(lf) audio, fs = stempeg.read_stems(os.path.join(config.wav_dir_mus, lf), stem_id=[0, 1, 2, 3, 4]) mixture = audio[0] drums = audio[1] bass = audio[2] acc = audio[3] vocals = audio[4] # out_feats = utils.stft_to_feats((vocals[:,0]+vocals[:,1])/2,fs) # utils.feats_to_audio(out_feats,lf,fs=fs) # import pdb;pdb.set_trace() backing = np.clip(drums + bass + acc, 0.0, 1.0) if len(backing.shape) == 2: backing = (backing[:, 0] + backing[:, 1]) / 2 # import pdb;pdb.set_trace() back_stft = abs(utils.stft(backing)) hdf5_file = h5py.File(config.backing_dir + 'mus_' + lf[:-9] + '.hdf5', mode='w') hdf5_file.create_dataset("back_stft", back_stft.shape, np.float32) hdf5_file["back_stft"][:, :] = back_stft hdf5_file.close() count += 1 utils.progress(count, len(wav_files))
def load_data(path): for folder in os.listdir(path): a = folder.split('_') gun_name = a[0] recording_method = a[1] print(gun_name, recording_method) for file in os.listdir(f'{path}/{folder}'): print(file) if file[0] != ".": file_path = f'{path}/{folder}/{file}' print(file_path) S, rate = stempeg.read_stems(file_path)
def test_multichannel_containers(audio, nb_channels, multichannel_format): with tmp.NamedTemporaryFile(delete=False, suffix='.' + multichannel_format) as tempfile: stempeg.write_stems(tempfile.name, audio, sample_rate=44100, writer=ChannelsWriter()) loaded_audio, rate = stempeg.read_stems( tempfile.name, always_3d=True, reader=stempeg.ChannelsReader(nb_channels=nb_channels)) assert audio.shape == loaded_audio.shape
def convert_to_wav(dir_path, wav_dir_path): """ Converts the compressed stem file to individual wav files for mixtures and its sources Parameters ---------- dir_path: str Path of the stems directory wav_dir_path: str Path to store the wav files """ if not os.path.exists(wav_dir_path): os.mkdir(wav_dir_path) # iterating throgh sets for sub_set in ['test', 'train']: sub_dir = os.path.join(dir_path, sub_set) wav_sub_dir = os.path.join(wav_dir_path, sub_set) if not os.path.exists(wav_sub_dir): os.mkdir(wav_sub_dir) # file itterator _, folders, files = next(os.walk(sub_dir)) print("Working with subset ", sub_dir) # iterating through files for i, f in enumerate(files): title = f.split(".stem.mp4")[0].replace("&", "_").replace("'", "_") print(i, ": " + title) wav_track_path = os.path.join(wav_sub_dir, title) if not os.path.exists(wav_track_path): os.mkdir(wav_track_path) # reading stems track_path = os.path.join(sub_dir, f) stems, sr = stempeg.read_stems(track_path) # saving wav files sp.write(path=os.path.join(wav_track_path, "mixture.wav"), data=stems[0], sr=sr) sp.write(path=os.path.join(wav_track_path, "vocals.wav"), data=stems[4], sr=sr) # generating accompaniment acc_list = [] for source in [1, 2, 3]: if stems[source] is not None: acc_list.append(stems[source]) accompaniment = np.sum(np.array(acc_list), axis=0) sp.write(path=os.path.join(wav_track_path, "accompaniment.wav"), data=accompaniment, sr=sr)
def stems(self): """array_like: [shape=(stems, num_samples, num_channels)] """ # return cached audio it explicitly set bet setter if self._stems is not None: return self._stems # read from disk to save RAM otherwise else: if os.path.exists(self.path): S, rate = stempeg.read_stems(filename=self.path) self._rate = rate return S
def _resamp(stempeg_filename): stempeg_path = os.path.join(in_dir, stempeg_filename) stempeg_filename_stub = stempeg_filename.replace('.stem.mp4', '').replace(' ', '') output_wav_dir = os.path.join(out_dir, stempeg_filename_stub) os.makedirs(output_wav_dir, exist_ok=True) # read stempeg format stem, input_sr = stempeg.read_stems(stempeg_path) for i, name in enumerate(stem_labels): wav = np.sum(stem[i, ...], axis=1) # sum to mono wav = librosa.resample(wav, input_sr, target_sr) out_path = os.path.join(output_wav_dir, f'{name}.wav') sf.write(out_path, wav, target_sr)
def separate( self, audio_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: """Predict on a single image.""" if isinstance(audio_or_filename, str): if audio_or_filename.endswith(".stem.mp4"): audio, self.samplerate = stempeg.read_stems( filename=audio_or_filename, stem_id=0) else: audio, self.samplerate = sf.read(audio_or_filename) else: audio = audio_or_filename if self.test_mode: return self.model.separate_audio_noop(audio) else: return self.model.separate_audio(audio)
def rate(self): """int: sample rate in Hz """ # load audio to set rate if self._rate is None: if os.path.exists(self.path): if not self.is_wav: audio, rate = stempeg.read_stems(filename=self.path, stem_id=self.stem_id) else: audio, rate = sf.read(self.path, always_2d=True) self._rate = rate return rate else: self._rate = None self._audio = None raise ValueError("Oops! %s cannot be loaded" % self.path) return self._rate
def audio(self): """array_like: [shape=(num_samples, num_channels)] """ # return cached audio if explicitly set by setter if self._audio is not None: return self._audio # read from disk to save RAM otherwise else: if os.path.exists(self.path): if not self.is_wav: audio, rate = stempeg.read_stems(filename=self.path, stem_id=self.stem_id) else: audio, rate = sf.read(self.path, always_2d=True) self._rate = rate return audio else: self._rate = None self._audio = None raise ValueError("Oops! %s cannot be loaded" % self.path)
def test_multistream_containers(audio, multistream_format, nb_stems): if nb_stems > 1: with tmp.NamedTemporaryFile(delete=False, suffix='.' + multistream_format) as tempfile: stem_names = [str(k) for k in range(nb_stems)] stempeg.write_stems(tempfile.name, audio, sample_rate=44100, writer=stempeg.StreamsWriter( codec='aac', stem_names=stem_names)) loaded_audio, rate = stempeg.read_stems(tempfile.name, always_3d=True) assert audio.shape == loaded_audio.shape if multistream_format == "m4a": info = stempeg.Info(tempfile.name) loaded_stem_names = info.title_streams # check if titles could be extracted assert all( [a == b for a, b in zip(stem_names, loaded_stem_names)])
def _process(self, file): audio, rate = stempeg.read_stems(file) duration = audio.shape[1] samples = int(duration / (self.sample_len * rate)) for stem in range(1, 5): for n in range(samples - 2): start = n * self.sample_len end = (n + 1) * self.sample_len sample = audio[stem, start * rate:end * rate, 0] fname = os.path.basename(file) sample_name = "{}_{}-{}_{}.wav".format(fname, start, end, stem) sample_path = os.path.join(self.output_path, sample_name) sf.write(sample_path, sample, rate) yield { "fname": sample_name, "start": start, "end": end, "category": stem, "origin_fname": fname, "is_silence": self._is_silence(sample) }
vocal_SAR = [] ctr = 0 source_path = sys.argv[1] target_path = sys.argv[2] print('Directory', target_path) os.chdir(source_path) dirs = os.listdir() for file in dirs: print(file) if (file.endswith('stem.mp4')): #reference source extraction ctr += 1 ys_stereo, fs = stempeg.read_stems(file, stem_id=0) yus = librosa.resample(np.transpose(ys_stereo), fs, 22050) ys = (yus[0, :] + yus[1, :]) / 2 yr_vocals_stereo, fs = stempeg.read_stems(file, stem_id=4) yur_vocals = librosa.resample(np.transpose(yr_vocals_stereo), fs, 22050) yr_vocals = (yur_vocals[0, :] + yur_vocals[1, :]) / 2 yr_accomp = ys - yr_vocals #loading source estimates temp = np.load(target_path + '/vocals/' + file[:-9] + '.npz') ye_vocals = temp['arr_0'] temp = np.load(target_path + '/accompaniment/' + file[:-9] + '.npz') ye_accomp = temp['arr_0']
def test_info(): fp = stempeg.example_stem_path() info = stempeg.Info(fp) S, rate = stempeg.read_stems(fp, info=info)
def test_outtype(dtype): S, rate = stempeg.read_stems(stempeg.example_stem_path(), out_type=dtype) assert S.dtype == dtype
def test_shape(): S, _ = stempeg.read_stems(stempeg.example_stem_path()) assert S.shape[0] == 5 assert ((S.shape[1] % 1024) == 0 and S.shape[1] > 200000) assert S.shape[2] == 2
def test_stem_id(): S, _ = stempeg.read_stems(stempeg.example_stem_path()) for k in range(S.shape[0]): Sk, _ = stempeg.read_stems(stempeg.example_stem_path(), stem_id=k) assert Sk.ndim == 2
import stempeg # 0 - The mixture, # 1 - The drums, # 2 - The bass, # 3 - The rest of the accompaniment, # 4 - The vocals. # example S, rate = stempeg.read_stems(stempeg.example_stem_path()) stempeg.write_stems( "output.mp4", S, sample_rate=rate, writer=stempeg.StreamsWriter()) S, rate = stempeg.read_stems("C:/Users/hahla/Downloads/output.mp4", stem_id=[0]) stems_folder = "D:/Development/github/GAN-tests/audio_files_split/audio_files_001" filename_mix = "D:/Development/github/GAN-tests/audio_files_split/audio_file_mixture_0002.wav" filename_drums = "D:/Development/github/GAN-tests/audio_files_split/audio_file_hits_0002.wav" filename_bass = "D:/Development/github/GAN-tests/audio_files_split/audio_file_soundless_audio_0002.wav" filename_other = "D:/Development/github/GAN-tests/audio_files_split/audio_file_background_0002.wav" filename_vocals = "D:/Development/github/GAN-tests/audio_files_split/audio_file_soundless_audio_0002.wav" S_filename_mix, rate = stempeg.read_stems(filename_mix, stem_id=0)