def __wavenet_encode(self, audio): # Load the model weights. checkpoint_path = '../wavenet-ckpt/model.ckpt-200000' # Load and downsample the audio. # neural_sample_rate = 16000 # audio = utils.load_audio(self._audio_dir + file_path, # sample_length=400000, # sr=neural_sample_rate) # Pass the audio through the first half of the autoencoder, # to get a list of latent variables that describe the sound. # Note that it would be quicker to pass a batch of audio # to fastgen. audio = np.squeeze(audio) # print(audio.shape) # print(len(audio)) if(len(audio.shape) > 1): encoding = fastgen.encode(audio, checkpoint_path, audio.shape[1]) else: encoding = fastgen.encode(audio, checkpoint_path, len(audio)) # print("Pre: " + str(encoding.shape)) encoding = self.__std_dev_mean_noise(encoding) # print("Post: " + str(encoding.shape)) # Reshape to a single sound. return encoding
def wavenet_encode(self, file_path, **kwargs): if os.path.exists('../../Pretrained_models/wavenet-ckpt/'): # Load the model weights. checkpoint_path = '../../Pretrained_models/wavenet-ckpt/model.ckpt-200000' else: raise Exception( 'you should download pretrained model to pretrained_models folder make prediction, the link is: http://download.magenta.tensorflow.org/models/nsynth/wavenet-ckpt.tar' ) # Load and downsample the audio. neural_sample_rate = 16000 audio = utils.load_audio(file_path, sample_length=400000, sr=neural_sample_rate) # Pass the audio through the first half of the autoencoder, # to get a list of latent variables that describe the sound. # Note that it would be quicker to pass a batch of audio # to fastgen. encoding = fastgen.encode(audio, checkpoint_path, len(audio)) # Reshape to a single sound. return encoding.reshape((-1, 16))
def encode(wav_filenames: List[str], checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000", sample_length: int = 16000, sample_rate: int = 16000) -> List[np.ndarray]: """ Encodes the list of filename to encodings by loading the wav files, encoding them using fastgen, and returning the result. :param wav_filenames: the list of filenames to encode, they need to be present in the "sound" folder :param checkpoint: the checkpoint folder :param sample_length: the sample length, can be calculated by multiplying the desired number of seconds by 16000 :param sample_rate: the sample rate, should be 16000 """ if not wav_filenames: return [] # Loads the audio for each filenames audios = [] for wav_filename in wav_filenames: audio = utils.load_audio(os.path.join("sounds", wav_filename), sample_length=sample_length, sr=sample_rate) audios.append(audio) # Encodes the audio for each new wav audios = np.array(audios) encodings = fastgen.encode(audios, checkpoint, sample_length) return encodings
def load_encoding(fname, sample_lenght=None, sr=16000, ckpt='model.ckpt-200000'): audio = utils.load_audio(fname, sample_length=sample_lenght, sr=sr) encoding = fastgen.encode(audio, ckpt, sample_lenght) return audio, encoding
def wavenet_encode(file_path): neural_sample_rate = 16000 audio = utils.load_audio(file_path, sample_length=400000, sr=neural_sample_rate) encoding = fastgen.encode(audio, '../wavenet-ckpt/model.ckpt-200000', len(audio)) return encoding.reshape((-1, 16))
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise RuntimeError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Generate from wav files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) exts = [os.path.splitext(f)[1] for f in files] if ".wav" in exts: postfix = ".wav" elif ".npy" in exts: postfix = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") postfix = ".npy" if FLAGS.npy_only else postfix files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(postfix) ]) elif source_path.lower().endswith((".wav", ".npy")): files = [source_path] else: files = [] # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join( save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] print('loading batch..') batch_data = fastgen.load_batch(batch_files, sample_length=sample_length) # Encode waveforms encodings = batch_data if postfix == ".npy" else fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path) else: fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'): '''sound loading''' audio = utils.load_audio(fname, sample_length=sample_length, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) '''encoding''' encoding = fastgen.encode(audio, ckpt, sample_length) print("(batch_size, time_steps, dimensions) :",encoding.shape) np.save(fname[fname.rfind('/') + 1:] + '.npy', encoding) return audio, encoding
def nsynth_encode(wav_path): # Checkpoint path checkpoint_path = './wavenet-ckpt/model.ckpt-200000' # Load Audio sr = 16000 audio, _ = lr.load(wav_path, sr=sr) # Encode encoding = fastgen.encode(audio, checkpoint_path, audio.shape[0]) return encoding.reshape((-1, 16))
def wavenet_encode(audio): # Load the model weights. checkpoint_path = './wavenet-ckpt/model.ckpt-200000' # Pass the audio through the first half of the autoencoder, # to get a list of latent variables that describe the sound. # Note that it would be quicker to pass a batch of audio # to fastgen. encoding = fastgen.encode(audio, checkpoint_path, len(audio)) # Reshape to a single sound. return encoding.reshape([-1, 16])
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise RuntimeError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Generate from wav files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) exts = [os.path.splitext(f)[1] for f in files] if ".wav" in exts: postfix = ".wav" elif ".npy" in exts: postfix = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") postfix = ".npy" if FLAGS.npy_only else postfix files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(postfix) ]) elif source_path.lower().endswith((".wav", ".npy")): files = [source_path] else: files = [] # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join(save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] batch_data = fastgen.load_batch(batch_files, sample_length=sample_length) # Encode waveforms encodings = batch_data if postfix == ".npy" else fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path) else: fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def load_encoding(_file, sample_length=None, sample_rate=16000, ckpt='model.ckpt-200000'): ''' Resamples signal to <sample_rate> and truncates it to <sample_length> elements Then encodes it through the model <ckpt> Returns a tuple (signal, encoded_signal) ''' audio = utils.load_audio(_file, sample_length=sample_length, sr=sample_rate) encoding = fastgen.encode(audio, ckpt, sample_length) return audio, encoding
def encode(path, filename): print('encoding..') sr = 16000 audio = utils.load_audio(path, sample_length=40000, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) model_path = '/home/paperspace/data/wavenet-ckpt/model.ckpt-200000' encoding = fastgen.encode(audio, model_path, sample_length) print(encoding.shape) print('finished encoding..') # np.save(fname + '.npy', encoding) decode(encoding, path, filename, sample_length, model_path)
def encode(paths: List[str], sample_length: int = 16000, sample_rate: int = 16000, checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000") \ -> np.ndarray: audios = [] for path in paths: audio = utils.load_audio(path, sample_length=sample_length, sr=sample_rate) audios.append(audio) audios = np.array(audios) encodings = fastgen.encode(audios, checkpoint, sample_length) return encodings
def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'): '''sound loading''' audio = utils.load_audio(fname, sample_length=sample_length, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) '''encoding''' encoding = fastgen.encode(audio, ckpt, sample_length) print("(batch_size, time_steps, dimensions) :", encoding.shape) np.save(fname[fname.rfind('/') + 1:] + '.npy', encoding) return audio, encoding
def wavenet_encode(file_path): # Load the model weights. checkpoint_path = './wavenet-ckpt/model.ckpt-200000' # Load and downsample the audio. neural_sample_rate = 16000 audio = utils.load_audio(file_path, sample_length=400000, sr=neural_sample_rate) # Pass the audio through the first half of the autoencoder, # to get a list of latent variables that describe the sound. # Note that it would be quicker to pass a batch of audio # to fastgen. encoding = fastgen.encode(audio, checkpoint_path, len(audio)) # Reshape to a single sound. return encoding.reshape((-1, 16))
def encode(): # from https://www.freesound.org/people/MustardPlug/sounds/395058/ # fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav' fname = './wav/mehldau-1.wav' sr = 44100 audio = utils.load_audio(fname, sample_length=44100, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) encoding = fastgen.encode(audio, './wavenet-ckpt/model.ckpt-200000', sample_length) print(encoding.shape) np.save(fname + '.npy', encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio) axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]) axs[1].set_title('NSynth Encoding') return encoding
def Plot_SingleFile(file_name, sampleRate): audio = utils.load_audio( file_name, sample_length=70000) #sample_length for how long will it be. sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sampleRate))) #Encoding for new sound part. encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length) print(encoding.shape) np.save(file_name + '.npy', encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio) axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]) axs[1].set_title('NSynth Encoding') #synthesis fastgen.synthesize(encoding, save_paths=['gen_' + file_name], samples_per_save=sample_length)
def main(unused_argv=None): tf.logging.set_verbosity(FLAGS.log) if FLAGS.checkpoint_path: checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) else: expdir = utils.shell_path(FLAGS.expdir) tf.logging.info("Will load latest checkpoint from %s.", expdir) while not tf.gfile.Exists(expdir): tf.logging.fatal("\tExperiment save dir '%s' does not exist!", expdir) sys.exit(1) try: checkpoint_path = tf.train.latest_checkpoint(expdir) except tf.errors.NotFoundError: tf.logging.fatal("There was a problem determining the latest checkpoint.") sys.exit(1) if not tf.train.checkpoint_exists(checkpoint_path): tf.logging.fatal("Invalid checkpoint path: %s", checkpoint_path) sys.exit(1) tf.logging.info("Will restore from checkpoint: %s", checkpoint_path) source_path = utils.shell_path(FLAGS.source_path) tf.logging.info("Will load Wavs from %s." % source_path) save_path = utils.shell_path(FLAGS.save_path) tf.logging.info("Will save embeddings to %s." % save_path) if not tf.gfile.Exists(save_path): tf.logging.info("Creating save directory...") tf.gfile.MakeDirs(save_path) sample_length = FLAGS.sample_length batch_size = FLAGS.batch_size def is_wav(f): return f.lower().endswith(".wav") wavfiles = sorted([ os.path.join(source_path, fname) for fname in tf.gfile.ListDirectory(source_path) if is_wav(fname) ]) for start_file in xrange(0, len(wavfiles), batch_size): batch_number = (start_file / batch_size) + 1 tf.logging.info("On file number %s (batch %d).", start_file, batch_number) end_file = start_file + batch_size wavefiles_batch = wavfiles[start_file:end_file] # Ensure that files has batch_size elements. batch_filler = batch_size - len(wavefiles_batch) wavefiles_batch.extend(batch_filler * [wavefiles_batch[-1]]) wav_data = np.array( [utils.load_audio(f, sample_length) for f in wavefiles_batch]) try: tf.reset_default_graph() # Load up the model for encoding and find the encoding encoding = encode(wav_data, checkpoint_path, sample_length=sample_length) if encoding.ndim == 2: encoding = np.expand_dims(encoding, 0) tf.logging.info("Encoding:") tf.logging.info(encoding.shape) tf.logging.info("Sample length: %d" % sample_length) for num, (wavfile, enc) in enumerate(zip(wavefiles_batch, encoding)): filename = "%s_embeddings.npy" % wavfile.split("/")[-1].strip(".wav") with tf.gfile.Open(os.path.join(save_path, filename), "w") as f: np.save(f, enc) if num + batch_filler + 1 == batch_size: break except Exception as e: tf.logging.info("Unexpected error happened: %s.", e) raise
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise ValueError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Use directory of files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) file_extensions = [os.path.splitext(f)[1] for f in files] if ".wav" in file_extensions: file_extension = ".wav" elif ".npy" in file_extensions: file_extension = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") file_extension = ".npy" if FLAGS.npy_only else file_extension files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(file_extension) ]) # Use a single file elif source_path.lower().endswith((".wav", ".npy")): file_extension = os.path.splitext(source_path.lower())[1] files = [source_path] else: raise ValueError( "source_path {} must be a folder or file.".format(source_path)) # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join(save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] # Encode waveforms if file_extension == ".wav": batch_data = fastgen.load_batch_audio( batch_files, sample_length=sample_length) encodings = fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) # Or load encodings else: encodings = fastgen.load_batch_encodings( batch_files, sample_length=sample_length) # Synthesize multi-gpu if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path) # Single gpu else: fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path)
import os import numpy as np from magenta.models.nsynth import utils from magenta.models.nsynth.wavenet import fastgen filename = '/data/input/battle1.wav' sr = 44100 audio = utils.load_audio(filename, sample_length=(sr * 4), sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) encoding = fastgen.encode(audio, '/data/model/wavenet-ckpt/model.ckpt-200000', sample_length) print(encoding.shape) np.save(filename.replace('.wav', '') + '_encoded.npy', encoding) fastgen.synthesize( encoding, save_paths=['/data/output/test.wav'], samples_per_save=sample_length, checkpoint_path="/data/model/wavenet-ckpt/model.ckpt-200000")
def load_encoding(fname, sample_length=None, sr=16000, ckpt=MODEL): audio = utils.load_audio(fname, sample_length=sample_length, sr=sr) encoding = fastgen.encode(audio, ckpt, sample_length) return audio, encoding
from magenta.models.nsynth import utils from magenta.models.nsynth.wavenet import fastgen from IPython.display import Audio #%matplotlib inline #%config InlineBackend.figure_format = 'jpg' fname = "03 Plimsoll Punks.wav" ckpt = "model.ckpt-200000" sr = 16000 audio = utils.load_audio(fname, sample_length=16000, sr=sr) sample_length = audio.shape[0] print ("{} samples , {} seconds".format(sample_length, sample_length/float(sr))) encoding = fastgen.encode(audio, ckpt, sample_length) print(encoding.shape) np.save(fname.split(".")[0] + ".npy", encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio); axs[0].set_title("Audio Signal") axs[1].plot(encoding[0]); axs[1].set_title("NSynth Encoding") # Verify fast to generate encoding fastgen.synthesize(encoding, save_paths=["gen_" + fname], samples_per_save=sample_length) sr = 16000
def unused(): # from https://www.freesound.org/people/MustardPlug/sounds/395058/ fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav' sr = 16000 audio = utils.load_audio(fname, sample_length=40000, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length) print(encoding.shape) np.save(fname + '.npy', encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio); axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]); axs[1].set_title('NSynth Encoding') fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length) sr = 16000 synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr) def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'): audio = utils.load_audio(fname, sample_length=sample_length, sr=sr) encoding = fastgen.encode(audio, ckpt, sample_length) return audio, encoding # from https://www.freesound.org/people/maurolupo/sounds/213259/ fname = '213259__maurolupo__girl-sings-laa.wav' sample_length = 32000 audio, encoding = load_encoding(fname, sample_length) fastgen.synthesize( encoding, save_paths=['gen_' + fname], samples_per_save=sample_length) synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr) # use image interpolation to stretch the encoding: (pip install scikit-image) from skimage.transform import resize def timestretch(encodings, factor): min_encoding, max_encoding = encoding.min(), encoding.max() encodings_norm = (encodings - min_encoding) / (max_encoding - min_encoding) timestretches = [] for encoding_i in encodings_norm: stretched = resize(encoding_i, (int(encoding_i.shape[0] * factor), encoding_i.shape[1]), mode='reflect') stretched = (stretched * (max_encoding - min_encoding)) + min_encoding timestretches.append(stretched) return np.array(timestretches) # from https://www.freesound.org/people/MustardPlug/sounds/395058/ fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav' sample_length = 40000 audio, encoding = load_encoding(fname, sample_length) audio = utils.load_audio('gen_slower_' + fname, sample_length=None, sr=sr) Audio(audio, rate=sr) encoding_slower = timestretch(encoding, 1.5) encoding_faster = timestretch(encoding, 0.5) fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True) axs[0].plot(encoding[0]); axs[0].set_title('Encoding (Normal Speed)') axs[1].plot(encoding_faster[0]); axs[1].set_title('Encoding (Faster))') axs[2].plot(encoding_slower[0]); axs[2].set_title('Encoding (Slower)') fastgen.synthesize(encoding_faster, save_paths=['gen_faster_' + fname]) fastgen.synthesize(encoding_slower, save_paths=['gen_slower_' + fname]) sample_length = 80000 # from https://www.freesound.org/people/MustardPlug/sounds/395058/ aud1, enc1 = load_encoding('395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav', sample_length) # from https://www.freesound.org/people/xserra/sounds/176098/ aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav', sample_length) enc_mix = (enc1 + enc2) / 2.0 fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]); axs[0].set_title('Encoding 1') axs[1].plot(enc2[0]); axs[1].set_title('Encoding 2') axs[2].plot(enc_mix[0]); axs[2].set_title('Average') fastgen.synthesize(enc_mix, save_paths='mix.wav') def fade(encoding, mode='in'): length = encoding.shape[1] fadein = (0.5 * (1.0 - np.cos(3.1415 * np.arange(length) / float(length)))).reshape(1, -1, 1) if mode == 'in': return fadein * encoding else: return (1.0 - fadein) * encoding fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]); axs[0].set_title('Original Encoding') axs[1].plot(fade(enc1, 'in')[0]); axs[1].set_title('Fade In') axs[2].plot(fade(enc1, 'out')[0]); axs[2].set_title('Fade Out') def crossfade(encoding1, encoding2): return fade(encoding1, 'out') + fade(encoding2, 'in') fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]); axs[0].set_title('Encoding 1') axs[1].plot(enc2[0]); axs[1].set_title('Encoding 2') axs[2].plot(crossfade(enc1, enc2)[0]); axs[2].set_title('Crossfade') fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])
def load_encoding(fname, sample_length=None, sr=16000, ckpt='../wavenet-ckpt/model.ckpt-200000'): audio = utils.load_audio(fname, sample_length=sample_length, sr=sr) encoding = fastgen.encode(audio, ckpt, sample_length) return audio, encoding
axs[1].plot(SecondSong_begin) axs[1].set_title('Second Song') st.pyplot() # Save original snippets output_dir = '/home/ubuntu/DeepBass/src/notebooks/' output_name1 = 'originalend_' + FirstSong_fname + '.wav' Save(output_dir, output_name1, FirstSong_end, sr) output_name2 = 'originalbegin_' + SecondSong_fname + '.wav' Save(output_dir, output_name2, SecondSong_begin, sr) model_dir = '/home/ubuntu/DeepBass/src/notebooks/wavenet-ckpt/model.ckpt-200000' # Create encodings start = time.time() enc1 = fastgen.encode(FirstSong_end, model_dir, sample_length) enc2 = fastgen.encode(SecondSong_begin, model_dir, sample_length) end = time.time() st.write('Encoding took ' + str((end - start)) + ' seconds') # Create cross fading in the latent space fade_type = 'LinearFade' xfade_encoding = crossfade(enc1, enc2, fade_type) fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]) axs[0].set_title('Encoding 1') axs[1].plot(enc2[0]) axs[1].set_title('Encoding 2') axs[2].plot(xfade_encoding[0]) axs[2].set_title('Crossfade')
padding = SAMPLE_LENGTH - len(audio_list[i]) audio_list[i] = np.pad(audio_list[i], (0, padding), 'constant') audio_list = np.array(audio_list) except Exception as e: print('Upload Cancelled') print(e) """# Encode Next we need to encode the audio. This should be relatively fast on a GPU, we will also create interpolations (the midpoints between each encoding) from which to re-synthesize audio. """ #@title Generate Encodings audio = np.array(audio_list) z = fastgen.encode(audio, ckpt_path, SAMPLE_LENGTH) print('Encoded %d files' % z.shape[0]) # Start with reconstructions z_list = [z_ for z_ in z] name_list = ['recon_' + name_ for name_ in names] # Add all the mean interpolations n = len(names) for i in range(n - 1): for j in range(i + 1, n): new_z = (z[i] + z[j]) / 2.0 new_name = 'interp_' + names[i] + '_X_'+ names[j] z_list.append(new_z) name_list.append(new_name)
def wavenet_encode(wave): model_path = './wavenet-ckpt/wavenet-ckpt/model.ckpt-200000' #模型位置 # audio = np.load(file_path) encoding = fastgen.encode(wave, model_path, len(wave)) print(encoding.reshape((-1, 16)).shape) return encoding.reshape((-1, 16))
# Take the last four seconds t_len = 1 silence_len = 7 x1 = x1[:silence_len * sr] x1 = x1[-sr * t_len:] sample_length = x1.shape[0] output_dir = '/home/ubuntu/DeepBass/src/notebooks/' output_name = 'original_' + filenames[0] + '.wav' Save(output_dir, output_name, x1, sr) model_dir = '/home/ubuntu/DeepBass/src/notebooks/wavenet-ckpt/model.ckpt-200000' # Create encoding start = time.time() encoding = fastgen.encode(x1, model_dir, sample_length) end = time.time() st.write('Encoding took ' + str((end - start)) + ' seconds') st.write('Encoding shape ' + str(encoding.shape)) # Save encoding np.save(filenames[0] + '.npy', encoding) # Plot PCM and encoding fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(x1) axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]) axs[1].set_title('NSynth Encoding') st.pyplot()
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise ValueError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Use directory of files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) file_extensions = [os.path.splitext(f)[1] for f in files] if ".wav" in file_extensions: file_extension = ".wav" elif ".npy" in file_extensions: file_extension = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") file_extension = ".npy" if FLAGS.npy_only else file_extension files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(file_extension) ]) # Use a single file elif source_path.lower().endswith((".wav", ".npy")): file_extension = os.path.splitext(source_path.lower())[1] files = [source_path] else: raise ValueError( "source_path {} must be a folder or file.".format(source_path)) # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join( save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] # Encode waveforms if file_extension == ".wav": batch_data = fastgen.load_batch_audio(batch_files, sample_length=sample_length) encodings = fastgen.encode(batch_data, checkpoint_path, sample_length=sample_length) # Or load encodings else: encodings = fastgen.load_batch_encodings( batch_files, sample_length=sample_length) # Synthesize multi-gpu if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path) # Single gpu else: fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
import os import numpy as np from magenta.models.nsynth import utils from magenta.models.nsynth.wavenet import fastgen fname = 'aggression.wav' sr = 16000 audio = utils.load_audio(fname, sample_length=40000, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) encoding = fastgen.encode(audio, os.path.abspath('model.ckpt-200000'), sample_length) print(encoding.shape) np.save(fname + '.npy', encoding) fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length) sr = 16000 synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr) print('Magenta Test')
def encode(self): self.encoding = fastgen.encode(self.audio, self.ckpt, self.sample_length)
def load_encoding(fname): audio = utils.load_audio(fname, sample_length=sl, sr=sr) print 'Encoding.. ', fname encoding = fastgen.encode(audio, ckpt, sl) print 'Encoded successfully' return audio, encoding