def infer(self, path, fft_window_size, phase_iterations=10, learn_phase=False, channels=1): console.log("Attempting to isolate vocals from", path) audio, sample_rate = conversion.load_audio_file(path) spectrogram = conversion.audio_file_to_spectrogram( audio, fft_window_size=fft_window_size, learn_phase=self.config.learn_phase) console.log("Retrieved spectrogram; processing...") info = self.process_spectrogram(spectrogram, channels) spectrogram, new_spectrogram = info console.log("reconverting to audio") # save original spectrogram as image path_parts = os.path.split(path) filename_parts = os.path.splitext(path_parts[1]) conversion.save_spectrogram( spectrogram, os.path.join(path_parts[0], filename_parts[0]) + ".png") # save network output self.save_audio(new_spectrogram, fft_window_size, phase_iterations, sample_rate, path, vocal=not self.config.instrumental, learn_phase=learn_phase) # save difference self.save_audio(spectrogram - new_spectrogram, fft_window_size, phase_iterations, sample_rate, path, vocal=self.config.instrumental, learn_phase=learn_phase) console.log("Vocal isolation complete")
def isolate_vocals(self, path, fft_window_size, phase_iterations=10): console.log("Attempting to isolate vocals from", path) start_time = time.time() audio, sample_rate = conversion.load_audio(path) spectrogram, phase = conversion.audio_to_spectrogram( audio, fft_window_size=fft_window_size, sr=sample_rate) # spectrogram, phase = conversion.isolate_vocal_simple(audio, fft_window_size=fft_window_size, sr=sample_rate) console.log("Retrieved spectrogram; processing...") expanded_spectrogram = conversion.expand_to_grid( spectrogram, self.peak_downscale_factor) expanded_spectrogram_with_batch_channels = expanded_spectrogram[ np.newaxis, :, :, np.newaxis] predicted_spectrogram_with_batch_channels = self.model.predict( expanded_spectrogram_with_batch_channels) predicted_spectrogram = predicted_spectrogram_with_batch_channels[ 0, :, :, 0] # o /// o new_spectrogram = predicted_spectrogram[:spectrogram.shape[0], : spectrogram.shape[1]] console.log("Processed spectrogram; reconverting to audio") new_audio = conversion.spectrogram_to_audio( new_spectrogram, fft_window_size=fft_window_size, phase_iterations=phase_iterations) path_parts = os.path.split(path) filename_parts = os.path.splitext(path_parts[1]) output_filename_base = os.path.join(path_parts[0], filename_parts[0] + "_acapella") console.log("Converted to audio; writing to", output_filename_base) conversion.save_audio(new_audio, output_filename_base + ".wav", sample_rate) conversion.save_spectrogram(new_spectrogram, output_filename_base + ".png") conversion.save_spectrogram( spectrogram, os.path.join(path_parts[0], filename_parts[0]) + ".png") # console.log("Vocal isolation complete 👌") print('execution time: {}'.format(time.time() - start_time)) return new_audio
def save_audio(self, spectrogram, fft_window_size, phase_iterations, sample_rate, path, vocal=True, learn_phase=False): part = "_vocal" if vocal else "_instrumental" new_audio = conversion.spectrogram_to_audio_file( spectrogram, fft_window_size=fft_window_size, phase_iterations=phase_iterations, learn_phase=learn_phase) path_parts = os.path.split(path) filename_parts = os.path.splitext(path_parts[1]) output_filename_base = os.path.join(path_parts[0], filename_parts[0] + part) console.log("Converted to audio; writing to", output_filename_base + ".wav") conversion.save_audio_file(new_audio, output_filename_base + ".wav", sample_rate) conversion.save_spectrogram(spectrogram, output_filename_base + ".png")
mashup = np.maximum(acapella, instrumental) # chop into slices so everything's the same size in a batch dim = SLICE_SIZE mashup_slices = chop(mashup, dim) acapella_slices = chop(acapella, dim) count += 1 self.x.extend(mashup_slices) self.y.extend(acapella_slices) console.info("Created", count, "mashups for key", k, "with", len(self.x), "total slices so far") # Add a "channels" channel to please the network self.x = np.array(self.x)[:, :, :, np.newaxis] self.y = np.array(self.y)[:, :, :, np.newaxis] # Save to file if asked if as_h5: h5f = h5py.File(h5_path, "w") h5f.create_dataset("x", data=self.x) h5f.create_dataset("y", data=self.y) h5f.close() if __name__ == "__main__": # Simple testing code to use while developing console.h1("Loading Data") d = Data(sys.argv[1], 1536) console.h1("Writing Sample Data") conversion.save_spectrogram(d.x[0], "x_sample_0.png") conversion.save_spectrogram(d.y[0], "y_sample_0.png") audio = conversion.spectrogram_to_audio(d.x[0], 1536) conversion.save_audio(audio, "x_sample.wav", 22050)
track_names = [name.encode("utf8") for name in self.track_names] names.create_dataset(name="track", data=track_names) for track in self.track_names: mashup.create_dataset(name=track.encode("utf8"), data=self.mashup[track]) vocal.create_dataset(name=track.encode("utf8"), data=self.vocal[track]) instrumental.create_dataset(name=track.encode("utf8"), data=self.instrumental[track]) h5f.close() if __name__ == "__main__": # Simple testing code to use while developing console.h1("Loading Data") d = Data() console.h1("Writing Sample Data") if not os.path.exists("sample"): os.mkdir("sample") n = 5 if len(sys.argv) > 2: n = int(sys.argv[2]) for i in range(n): conversion.save_spectrogram(d.x[i].squeeze(), "sample/x_sample_{}.png".format(i)) conversion.save_spectrogram(d.y[i].squeeze(), "sample/y_sample_{}.png".format(i)) audio = conversion.spectrogram_to_audio_file(d.x[i].squeeze(), 1536) conversion.save_audio_file(audio, "sample/x_sample_{}.wav".format(i), 22050)