def export(srcdirname, destination_dir, format='mp3'): """Exports separated music into wav or mp3 Parameters ---------- srcdirname : str Source Directory containing separated files destination_dir: str Destination directory format : {'wav','mp3'} File formats (codecs) """ destination_dir = os.path.join(destination_dir, srcdirname) os.makedirs(destination_dir, exist_ok=True) audio_adapter = get_default_audio_adapter() pool = Pool() for track in ('vocals.wav', 'other.wav', 'bass.wav', 'drums.wav', 'piano.wav'): filepath = os.path.join(directories['tmpsplit'], srcdirname, track) if os.path.exists(filepath): if format == 'wav': shutil.copy2(filepath, destination_dir) else: data = list(audio_adapter.load(filepath)) instrument = track.split('.')[0] pool.apply_async( audio_adapter.save, (os.path.join(destination_dir, f'{instrument}.mp3'), *data, 'mp3', '128k')) #pool.close() pool.join()
def ssp(file_path, sr, cache, out_dir): separator = Separator('spleeter:5stems', multiprocess=False) audio_loader = get_default_audio_adapter() file_path = os.path.join(out_dir, file_path) waveform, _ = audio_loader.load(file_path, sample_rate=sr) prediction = separator.separate(waveform) file_name = os.path.split(file_path)[-1].split( f'.{filetype.guess(file_path).extension}')[0] cache_dir = get_save_dir(out_dir, ['ssp', file_name]) cache_paths = [ os.path.join(cache_dir, f'{path}.wav') for path in ['track', 'perc', 'bass', 'harm'] ] paths_exist = check_files_exist(cache_paths) if cache and paths_exist: ssp_audios = map(lambda cache_path: librosa.load(cache_path, sr)[0], cache_paths) else: audio = waveform[:, 0] perc = prediction['drums'][:, 0] bass = prediction['bass'][:, 0] harm = prediction['piano'][:, 0] + prediction[ 'vocals'][:, 0] + prediction['other'][:, 0] ssp_audios = [audio, perc, bass, harm] if cache and not paths_exist: map( lambda index: save_audio(cache_paths[index], ssp_audios[index], sr ), range(len(ssp_audios))) return ssp_audios
def separate_to_file( self, audio_descriptor, destination, audio_adapter=get_default_audio_adapter(), offset=0, duration=600.0, codec="wav", bitrate="128k", filename_format="{filename}/{instrument}.{codec}", ): """ Performs source separation and export result to file using given audio adapter. Filename format should be a Python formattable string that could use following parameters : {instrument}, {filename} and {codec}. :param audio_descriptor: Describe song to separate, used by audio adapter to retrieve and load audio data, in case of file based audio adapter, such descriptor would be a file path. :param destination: Target directory to write output to. :param audio_adapter: (Optional) Audio adapter to use for I/O. :param offset: (Optional) Offset of loaded song. :param duration: (Optional) Duration of loaded song. :param codec: (Optional) Export codec. :param bitrate: (Optional) Export bitrate. :param filename_format: (Optional) Filename format. """ waveform, _ = audio_adapter.load( audio_descriptor, offset=offset, duration=duration, sample_rate=self._sample_rate, ) sources = self.separate(waveform) filename = splitext(basename(audio_descriptor))[0] generated = [] for instrument, data in sources.items(): formatted_name = filename_format.format(filename=filename, instrument=instrument, codec=codec) path = join( destination, formatted_name, ) if path in generated: raise SpleeterError( (f"Separated source path conflict : {path}," "please check your filename format")) generated.append(path) audio_adapter.save(path, data, self._sample_rate, codec, bitrate) # TODO: set to something more descriptive later zip_path = generate_random_filename(destination, "zip") with ZipFile(zip_path, "w") as zip: for output_path in generated: zip.write(output_path, arcname=basename(output_path)) with app.app_context(): return url_for("separated", filename=basename(zip_path))
def splitter(path): '''takes a file name from youtube.py and separates into 5 stems: vocals, drums, bass, piano, and accompaniment''' separator = Separator('spleeter:5stems') audio_loader = get_default_audio_adapter() waveform, rate = audio_loader.load(path, sample_rate=None) prediction = separator.separate(waveform) # tools.clear_wavs() return prediction, rate
def Remove_background_music(self, inpath): # Use audio loader explicitly for loading audio waveform : # from spleeter.audio.adapter import get_default_audio_adapter audio_loader = get_default_audio_adapter() sample_rate = 44100 # waveform, _ = audio_loader.load('/path/to/audio/file', sample_rate=sample_rate) waveform, _ = audio_loader.load(inpath, sample_rate=sample_rate) # Perform the separation : prediction = separator.separate(waveform) print("Remove background music done!")
def __init__(self, bitrate=256): """Default constructor. :param config: Separator config, defaults to None """ self.audio_bitrate = f'{bitrate}k' self.audio_format = 'mp3' self.sample_rate = 44100 self.spleeter_stem = 'config/4stems-16kHz.json' # Use librosa backend as it is less memory intensive self.separator = Separator(self.spleeter_stem, stft_backend='librosa', multiprocess=False) self.audio_adapter = get_default_audio_adapter()
def separate_one_audio_on_accompaniment_and_vocals_by_spleeter(path_to_audio, sample_rate, output_directory): audio_loader = get_default_audio_adapter() separator = Separator('spleeter:2stems') filename=path_to_audio.split('/')[-1].split('\\')[-1] waveform, _ = audio_loader.load(path_to_audio, sample_rate=sample_rate) # Perform the separation : prediction = separator.separate(waveform) accompaniment=prediction['accompaniment'] vocals=prediction['vocals'] wavfile.write(output_directory + '.'.join(filename.split('.')[:-1])+'_accompaniment'+'.wav', sample_rate, accompaniment) wavfile.write(output_directory + '.'.join(filename.split('.')[:-1])+'_vocals'+'.wav', sample_rate, vocals) del audio_loader, separator, waveform, prediction, accompaniment, vocals gc.collect()
def split_it(): separator = Separator('spleeter:4stems') # Using custom configuration file. #separator = Separator('/path/to/config.json') audio_loader = get_default_audio_adapter() sample_rate = 44100 waveform, _ = audio_loader.load('static/audio/audio.mp3', sample_rate=sample_rate) # Perform the separation : separator.separate_to_file('static/audio/audio.mp3', 'static/') print('done', os.getcwd())
def generate_fake_eval_dataset(path): aa = get_default_audio_adapter() n_songs = 2 fs = 44100 duration = 3 n_channels = 2 rng = np.random.RandomState(seed=0) for song in range(n_songs): song_path = join(path, "test", f"song{song}") makedirs(song_path, exist_ok=True) for instr in ["mixture", "vocals", "bass", "drums", "other"]: filename = join(song_path, f"{instr}.wav") data = rng.rand(duration * fs, n_channels) - 0.5 aa.save(filename, data, fs)
def __init__(self, config=None): if config is None: self.audio_bitrate = '256k' self.audio_format = 'mp3' self.sample_rate = 44100 self.spleeter_stem = 'config/4stems-16kHz.json' else: self.audio_bitrate = config['audio_bitrate'] self.audio_format = config['audio_format'] self.sample_rate = config['sample_rate'] self.spleeter_stem = config['spleeter_stem'] self.separator = Separator(self.spleeter_stem, stft_backend='librosa', multiprocess=False) self.audio_adapter = get_default_audio_adapter()
def test_separate(configuration, instruments): """ Test separation from raw data. """ adapter = get_default_audio_adapter() waveform, _ = adapter.load(TEST_AUDIO_DESCRIPTOR) separator = Separator(configuration) prediction = separator.separate(waveform) assert len(prediction) == len(instruments) for instrument in instruments: assert instrument in prediction for instrument in instruments: track = prediction[instrument] assert not (waveform == track).all() for compared in instruments: if instrument != compared: assert not (track == prediction[compared]).all()
def test_separate(configuration, instruments, backend): """ Test separation from raw data. """ adapter = get_default_audio_adapter() waveform, _ = adapter.load(TEST_AUDIO_DESCRIPTOR) separator = Separator(configuration, stft_backend=backend) prediction = separator.separate(waveform, TEST_AUDIO_DESCRIPTOR) assert len(prediction) == len(instruments) for instrument in instruments: assert instrument in prediction for instrument in instruments: track = prediction[instrument] assert waveform.shape == track.shape assert not np.allclose(waveform, track) for compared in instruments: if instrument != compared: assert not np.allclose(track, prediction[compared])
def __init__(self, videoPath, duration, before, after, multiThread, parent=None): super(separateQThread, self).__init__(parent) self.videoPath = videoPath self.duration = duration self.beforeCnt = int(before) // 20 self.afterCnt = int(after) // 20 self.separate = Separator('spleeter:2stems', stft_backend='tensorflow', multiprocess=multiThread) self.audioLoader = get_default_audio_adapter()
def test_separate(test_file, configuration, backend): """ Test separation from raw data. """ instruments = MODEL_TO_INST[configuration] adapter = get_default_audio_adapter() waveform, _ = adapter.load(test_file) separator = Separator(configuration, stft_backend=backend, multiprocess=False) prediction = separator.separate(waveform, test_file) assert len(prediction) == len(instruments) for instrument in instruments: assert instrument in prediction for instrument in instruments: track = prediction[instrument] assert waveform.shape[:-1] == track.shape[:-1] assert not np.allclose(waveform, track) for compared in instruments: if instrument != compared: assert not np.allclose(track, prediction[compared])
async def initiation(kutana): from spleeter.audio.adapter import get_default_audio_adapter from spleeter.separator import Separator environ["GITHUB_REPOSITORY"] = "bruce-willis/Audio-denoiser" environ["GITHUB_RELEASE"] = "v0.1" config_url = "https://raw.githubusercontent.com/bruce-willis/Audio-denoiser/develop/src/training/config/voice_config.json" config_path = "voice_config.json" _ = urllib.request.urlretrieve(url=config_url, filename=config_path) separator = Separator(config_path) predictor = separator._get_predictor() plugin.predictor = predictor adapter = get_default_audio_adapter() plugin.adapter = adapter
def execute(args): try: logger.info('音声認識処理開始: {0}', args.audio_file, decoration=MLogger.DECORATION_BOX) if not os.path.exists(args.audio_file): logger.error("指定された音声ファイルパスが存在しません。\n{0}", args.video_file, decoration=MLogger.DECORATION_BOX) return False, None # 親パス(指定がなければ動画のある場所。Colabはローカルで作成するので指定あり想定) base_path = str(pathlib.Path(args.audio_file).parent ) if not args.parent_dir else args.parent_dir audio_adapter = get_default_audio_adapter() sample_rate = 44100 waveform, _ = audio_adapter.load(args.audio_file, sample_rate=sample_rate) # 音声と曲に分離 separator = Separator('spleeter:2stems') # Perform the separation : prediction = separator.separate(waveform) # 音声データ vocals = prediction['vocals'] audio_adapter.save(f"{base_path}/vocals.wav", vocals, separator._sample_rate, "wav", "16k") logger.info('音声認識処理終了: {0}', base_path, decoration=MLogger.DECORATION_BOX) return True except Exception as e: logger.critical("音声認識で予期せぬエラーが発生しました。", e, decoration=MLogger.DECORATION_BOX) return False
def test_separator_backends(test_file): adapter = get_default_audio_adapter() waveform, _ = adapter.load(test_file) separator_lib = Separator("spleeter:2stems", stft_backend="librosa") separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow") # Test the stft and inverse stft provides exact reconstruction stft_matrix = separator_lib._stft(waveform) reconstructed = separator_lib._stft( stft_matrix, inverse=True, length=waveform.shape[0]) assert np.allclose(reconstructed, waveform, atol=3e-2) # compare both separation, it should be close out_tf = separator_tf._separate_tensorflow(waveform, test_file) out_lib = separator_lib._separate_librosa(waveform, test_file) for instrument in out_lib.keys(): # test that both outputs are close everywhere assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5)
def __init__(self, config=None): """Default constructor. :param config: Separator config, defaults to None """ if config is None: self.audio_bitrate = '256k' self.audio_format = 'mp3' self.sample_rate = 44100 self.spleeter_stem = 'config/4stems-16kHz.json' else: self.audio_bitrate = config['audio_bitrate'] self.audio_format = config['audio_format'] self.sample_rate = config['sample_rate'] self.spleeter_stem = config['spleeter_stem'] # Use librosa backend as it is less memory intensive self.separator = Separator(self.spleeter_stem, stft_backend='librosa', multiprocess=False) self.audio_adapter = get_default_audio_adapter()
def __init__(self, stems: int, chunk_size=2): """ Args: stems (int): total files to generate (2/3/5). chunk_size (int): chunk size (in seconds) indicates duration size of individual chunk before splitting. NOTE: Longer audio file takes more memory. Hence, splitting the audio is a workaround. """ # specified stem loads a specific model # hence, it should be specified which model # to load. self.stems = stems #in minutes self.chunk_size = int(chunk_size * 60) self._separator = Separator(f"spleeter:{self.stems}stems") # spleeter specific config self._audio_adapter = get_default_audio_adapter()
def __init__(self, cpu_separation: bool, bitrate=256, softmask=False, alpha=1.0, iterations=1): """Default constructor. :param config: Separator config, defaults to None """ if cpu_separation: raise ValueError('X-UMX only works with GPU. Task aborted.') self.model_file = 'x-umx.h5' self.model_dir = Path('pretrained_models') self.model_file_path = self.model_dir / self.model_file self.context = 'cudnn' self.softmask = softmask self.alpha = alpha self.iterations = iterations self.bitrate = bitrate self.sample_rate = 44100 self.residual_model = False self.audio_adapter = get_default_audio_adapter()
def generate_fake_training_dataset(path, instrument_list=["vocals", "other"]): """ generates a fake training dataset in path: - generates audio files - generates a csv file describing the dataset """ aa = get_default_audio_adapter() n_songs = 2 fs = 44100 duration = 6 n_channels = 2 rng = np.random.RandomState(seed=0) dataset_df = pd.DataFrame(columns=["mix_path"]+[f"{instr}_path" for instr in instrument_list]+["duration"]) for song in range(n_songs): song_path = join(path, "train", f"song{song}") makedirs(song_path, exist_ok=True) dataset_df.loc[song, f"duration"] = duration for instr in instrument_list+["mix"]: filename = join(song_path, f"{instr}.wav") data = rng.rand(duration*fs, n_channels)-0.5 aa.save(filename, data, fs) dataset_df.loc[song, f"{instr}_path"] = join("train", f"song{song}", f"{instr}.wav") dataset_df.to_csv(join(path, "train", "train.csv"), index=False)
def adapter(): """ Target test audio adapter fixture. """ return get_default_audio_adapter()
filename = args.input outputname = args.output if '/' not in outputname: outputname = './' + outputname modelname = args.params_filename # load model print("load model") separator = Separator(modelname, stft_backend='tensorflow') separator._params['attack'] = 4 predictor = separator._get_predictor() # load audio print("load audio") audio_loader = get_default_audio_adapter() sample_rate = 44100 waveform, _ = audio_loader.load(filename, sample_rate=sample_rate) print(waveform.dtype) print("max amplitude: {}".format(np.max(np.abs(waveform)))) # compute spectrogram print("compute stft") frame_length = separator._params['frame_length'] frame_step = separator._params['frame_step'] with predictor.graph.as_default(): stft_feature = tf.transpose( stft(tf.transpose(waveform), frame_length, frame_step,
def audio_data(): waveform, _ = get_default_audio_adapter().load(TEST_AUDIO_DESCRIPTOR, TEST_OFFSET, TEST_DURATION, TEST_SAMPLE_RATE) return waveform