Exemple #1
0
    def __wavenet_encode(self, audio):

        # Load the model weights.
        checkpoint_path = '../wavenet-ckpt/model.ckpt-200000'

        # Load and downsample the audio.
        # neural_sample_rate = 16000
        # audio = utils.load_audio(self._audio_dir + file_path, 
        #                          sample_length=400000, 
        #                          sr=neural_sample_rate)

        # Pass the audio through the first half of the autoencoder,
        # to get a list of latent variables that describe the sound.
        # Note that it would be quicker to pass a batch of audio
        # to fastgen. 
        audio = np.squeeze(audio)
        # print(audio.shape)
        # print(len(audio))
        if(len(audio.shape) > 1):
            encoding = fastgen.encode(audio, checkpoint_path, audio.shape[1])
        else:
            encoding = fastgen.encode(audio, checkpoint_path, len(audio))

#         print("Pre: " + str(encoding.shape))
        encoding = self.__std_dev_mean_noise(encoding)
#         print("Post: " + str(encoding.shape))
        # Reshape to a single sound.
        return encoding
Exemple #2
0
    def wavenet_encode(self, file_path, **kwargs):

        if os.path.exists('../../Pretrained_models/wavenet-ckpt/'):

            # Load the model weights.
            checkpoint_path = '../../Pretrained_models/wavenet-ckpt/model.ckpt-200000'
        else:
            raise Exception(
                'you should download pretrained model to pretrained_models folder make prediction, the link is: http://download.magenta.tensorflow.org/models/nsynth/wavenet-ckpt.tar'
            )

        # Load and downsample the audio.
        neural_sample_rate = 16000
        audio = utils.load_audio(file_path,
                                 sample_length=400000,
                                 sr=neural_sample_rate)

        # Pass the audio through the first half of the autoencoder,
        # to get a list of latent variables that describe the sound.
        # Note that it would be quicker to pass a batch of audio
        # to fastgen.
        encoding = fastgen.encode(audio, checkpoint_path, len(audio))

        # Reshape to a single sound.
        return encoding.reshape((-1, 16))
def encode(wav_filenames: List[str],
           checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000",
           sample_length: int = 16000,
           sample_rate: int = 16000) -> List[np.ndarray]:
    """
  Encodes the list of filename to encodings by loading the wav files,
  encoding them using fastgen, and returning the result.

  :param wav_filenames: the list of filenames to encode, they need to be
  present in the "sound" folder
  :param checkpoint: the checkpoint folder
  :param sample_length: the sample length, can be calculated by multiplying
  the desired number of seconds by 16000
  :param sample_rate: the sample rate, should be 16000
  """
    if not wav_filenames:
        return []

    # Loads the audio for each filenames
    audios = []
    for wav_filename in wav_filenames:
        audio = utils.load_audio(os.path.join("sounds", wav_filename),
                                 sample_length=sample_length,
                                 sr=sample_rate)
        audios.append(audio)

    # Encodes the audio for each new wav
    audios = np.array(audios)
    encodings = fastgen.encode(audios, checkpoint, sample_length)

    return encodings
Exemple #4
0
def load_encoding(fname,
                  sample_lenght=None,
                  sr=16000,
                  ckpt='model.ckpt-200000'):
    audio = utils.load_audio(fname, sample_length=sample_lenght, sr=sr)
    encoding = fastgen.encode(audio, ckpt, sample_lenght)
    return audio, encoding
Exemple #5
0
def wavenet_encode(file_path):
    neural_sample_rate = 16000
    audio = utils.load_audio(file_path,
                             sample_length=400000,
                             sr=neural_sample_rate)
    encoding = fastgen.encode(audio, '../wavenet-ckpt/model.ckpt-200000',
                              len(audio))
    return encoding.reshape((-1, 16))
def main(unused_argv=None):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
    source_path = utils.shell_path(FLAGS.source_path)
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
    save_path = utils.shell_path(FLAGS.save_path)
    if not save_path:
        raise RuntimeError("Must specify a save_path.")
    tf.logging.set_verbosity(FLAGS.log)

    # Generate from wav files
    if tf.gfile.IsDirectory(source_path):
        files = tf.gfile.ListDirectory(source_path)
        exts = [os.path.splitext(f)[1] for f in files]
        if ".wav" in exts:
            postfix = ".wav"
        elif ".npy" in exts:
            postfix = ".npy"
        else:
            raise RuntimeError("Folder must contain .wav or .npy files.")
        postfix = ".npy" if FLAGS.npy_only else postfix
        files = sorted([
            os.path.join(source_path, fname) for fname in files
            if fname.lower().endswith(postfix)
        ])

    elif source_path.lower().endswith((".wav", ".npy")):
        files = [source_path]
    else:
        files = []

    # Now synthesize from files one batch at a time
    batch_size = FLAGS.batch_size
    sample_length = FLAGS.sample_length
    n = len(files)
    for start in range(0, n, batch_size):
        end = start + batch_size
        batch_files = files[start:end]
        save_names = [
            os.path.join(
                save_path,
                "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
            for f in batch_files
        ]
        print('loading batch..')
        batch_data = fastgen.load_batch(batch_files,
                                        sample_length=sample_length)
        # Encode waveforms
        encodings = batch_data if postfix == ".npy" else fastgen.encode(
            batch_data, checkpoint_path, sample_length=sample_length)
        if FLAGS.gpu_number != 0:
            with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
                fastgen.synthesize(encodings,
                                   save_names,
                                   checkpoint_path=checkpoint_path)
        else:
            fastgen.synthesize(encodings,
                               save_names,
                               checkpoint_path=checkpoint_path)
def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'):
    '''sound loading'''
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr)))
    '''encoding'''
    encoding = fastgen.encode(audio, ckpt, sample_length)
    print("(batch_size, time_steps, dimensions) :",encoding.shape)
    np.save(fname[fname.rfind('/') + 1:] + '.npy', encoding)
    return audio, encoding
def nsynth_encode(wav_path):
    # Checkpoint path
    checkpoint_path = './wavenet-ckpt/model.ckpt-200000'

    # Load Audio
    sr = 16000
    audio, _ = lr.load(wav_path, sr=sr)

    # Encode
    encoding = fastgen.encode(audio, checkpoint_path, audio.shape[0])

    return encoding.reshape((-1, 16))
def wavenet_encode(audio):
    # Load the model weights.
    checkpoint_path = './wavenet-ckpt/model.ckpt-200000'

    # Pass the audio through the first half of the autoencoder,
    # to get a list of latent variables that describe the sound.
    # Note that it would be quicker to pass a batch of audio
    # to fastgen.
    encoding = fastgen.encode(audio, checkpoint_path, len(audio))

    # Reshape to a single sound.
    return encoding.reshape([-1, 16])
Exemple #10
0
def main(unused_argv=None):
  os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
  source_path = utils.shell_path(FLAGS.source_path)
  checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  save_path = utils.shell_path(FLAGS.save_path)
  if not save_path:
    raise RuntimeError("Must specify a save_path.")
  tf.logging.set_verbosity(FLAGS.log)

  # Generate from wav files
  if tf.gfile.IsDirectory(source_path):
    files = tf.gfile.ListDirectory(source_path)
    exts = [os.path.splitext(f)[1] for f in files]
    if ".wav" in exts:
      postfix = ".wav"
    elif ".npy" in exts:
      postfix = ".npy"
    else:
      raise RuntimeError("Folder must contain .wav or .npy files.")
    postfix = ".npy" if FLAGS.npy_only else postfix
    files = sorted([
        os.path.join(source_path, fname)
        for fname in files
        if fname.lower().endswith(postfix)
    ])

  elif source_path.lower().endswith((".wav", ".npy")):
    files = [source_path]
  else:
    files = []

  # Now synthesize from files one batch at a time
  batch_size = FLAGS.batch_size
  sample_length = FLAGS.sample_length
  n = len(files)
  for start in range(0, n, batch_size):
    end = start + batch_size
    batch_files = files[start:end]
    save_names = [
        os.path.join(save_path,
                     "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
        for f in batch_files
    ]
    batch_data = fastgen.load_batch(batch_files, sample_length=sample_length)
    # Encode waveforms
    encodings = batch_data if postfix == ".npy" else fastgen.encode(
        batch_data, checkpoint_path, sample_length=sample_length)
    if FLAGS.gpu_number != 0:
      with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
        fastgen.synthesize(
            encodings, save_names, checkpoint_path=checkpoint_path)
    else:
      fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def load_encoding(_file,
                  sample_length=None,
                  sample_rate=16000,
                  ckpt='model.ckpt-200000'):
    '''
    Resamples signal to <sample_rate> and truncates it to <sample_length> elements
    Then encodes it through the model <ckpt>
    Returns a tuple (signal, encoded_signal)
    '''
    audio = utils.load_audio(_file,
                             sample_length=sample_length,
                             sr=sample_rate)
    encoding = fastgen.encode(audio, ckpt, sample_length)
    return audio, encoding
Exemple #12
0
def encode(path, filename):
    print('encoding..')
    sr = 16000
    audio = utils.load_audio(path, sample_length=40000, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sr)))

    model_path = '/home/paperspace/data/wavenet-ckpt/model.ckpt-200000'
    encoding = fastgen.encode(audio, model_path, sample_length)
    print(encoding.shape)
    print('finished encoding..')
    # np.save(fname + '.npy', encoding)
    decode(encoding, path, filename, sample_length, model_path)
def encode(paths: List[str],
           sample_length: int = 16000,
           sample_rate: int = 16000,
           checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000") \
    -> np.ndarray:
    audios = []
    for path in paths:
        audio = utils.load_audio(path,
                                 sample_length=sample_length,
                                 sr=sample_rate)
        audios.append(audio)
    audios = np.array(audios)
    encodings = fastgen.encode(audios, checkpoint, sample_length)
    return encodings
def load_encoding(fname,
                  sample_length=None,
                  sr=16000,
                  ckpt='model.ckpt-200000'):
    '''sound loading'''
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sr)))
    '''encoding'''
    encoding = fastgen.encode(audio, ckpt, sample_length)
    print("(batch_size, time_steps, dimensions) :", encoding.shape)
    np.save(fname[fname.rfind('/') + 1:] + '.npy', encoding)
    return audio, encoding
Exemple #15
0
def wavenet_encode(file_path):
    
    # Load the model weights.
    checkpoint_path = './wavenet-ckpt/model.ckpt-200000'
    
    # Load and downsample the audio.
    neural_sample_rate = 16000
    audio = utils.load_audio(file_path, 
                             sample_length=400000, 
                             sr=neural_sample_rate)
    
    # Pass the audio through the first half of the autoencoder,
    # to get a list of latent variables that describe the sound.
    # Note that it would be quicker to pass a batch of audio
    # to fastgen. 
    encoding = fastgen.encode(audio, checkpoint_path, len(audio))
    
    # Reshape to a single sound.
    return encoding.reshape((-1, 16))
Exemple #16
0
def encode():
    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    # fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    fname = './wav/mehldau-1.wav'
    sr = 44100
    audio = utils.load_audio(fname, sample_length=44100, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sr)))

    encoding = fastgen.encode(audio, './wavenet-ckpt/model.ckpt-200000',
                              sample_length)

    print(encoding.shape)

    np.save(fname + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio)
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0])
    axs[1].set_title('NSynth Encoding')
    return encoding
Exemple #17
0
def Plot_SingleFile(file_name, sampleRate):
    audio = utils.load_audio(
        file_name,
        sample_length=70000)  #sample_length for how long will it be.
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sampleRate)))

    #Encoding for new sound part.
    encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length)
    print(encoding.shape)
    np.save(file_name + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio)
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0])
    axs[1].set_title('NSynth Encoding')

    #synthesis
    fastgen.synthesize(encoding,
                       save_paths=['gen_' + file_name],
                       samples_per_save=sample_length)
def main(unused_argv=None):
  tf.logging.set_verbosity(FLAGS.log)

  if FLAGS.checkpoint_path:
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  else:
    expdir = utils.shell_path(FLAGS.expdir)
    tf.logging.info("Will load latest checkpoint from %s.", expdir)
    while not tf.gfile.Exists(expdir):
      tf.logging.fatal("\tExperiment save dir '%s' does not exist!", expdir)
      sys.exit(1)

    try:
      checkpoint_path = tf.train.latest_checkpoint(expdir)
    except tf.errors.NotFoundError:
      tf.logging.fatal("There was a problem determining the latest checkpoint.")
      sys.exit(1)

  if not tf.train.checkpoint_exists(checkpoint_path):
    tf.logging.fatal("Invalid checkpoint path: %s", checkpoint_path)
    sys.exit(1)

  tf.logging.info("Will restore from checkpoint: %s", checkpoint_path)

  source_path = utils.shell_path(FLAGS.source_path)
  tf.logging.info("Will load Wavs from %s." % source_path)

  save_path = utils.shell_path(FLAGS.save_path)
  tf.logging.info("Will save embeddings to %s." % save_path)
  if not tf.gfile.Exists(save_path):
    tf.logging.info("Creating save directory...")
    tf.gfile.MakeDirs(save_path)

  sample_length = FLAGS.sample_length
  batch_size = FLAGS.batch_size

  def is_wav(f):
    return f.lower().endswith(".wav")

  wavfiles = sorted([
      os.path.join(source_path, fname)
      for fname in tf.gfile.ListDirectory(source_path) if is_wav(fname)
  ])

  for start_file in xrange(0, len(wavfiles), batch_size):
    batch_number = (start_file / batch_size) + 1
    tf.logging.info("On file number %s (batch %d).", start_file, batch_number)
    end_file = start_file + batch_size
    wavefiles_batch = wavfiles[start_file:end_file]

    # Ensure that files has batch_size elements.
    batch_filler = batch_size - len(wavefiles_batch)
    wavefiles_batch.extend(batch_filler * [wavefiles_batch[-1]])
    wav_data = np.array(
        [utils.load_audio(f, sample_length) for f in wavefiles_batch])
    try:
      tf.reset_default_graph()
      # Load up the model for encoding and find the encoding
      encoding = encode(wav_data, checkpoint_path, sample_length=sample_length)
      if encoding.ndim == 2:
        encoding = np.expand_dims(encoding, 0)

      tf.logging.info("Encoding:")
      tf.logging.info(encoding.shape)
      tf.logging.info("Sample length: %d" % sample_length)

      for num, (wavfile, enc) in enumerate(zip(wavefiles_batch, encoding)):
        filename = "%s_embeddings.npy" % wavfile.split("/")[-1].strip(".wav")
        with tf.gfile.Open(os.path.join(save_path, filename), "w") as f:
          np.save(f, enc)

        if num + batch_filler + 1 == batch_size:
          break
    except Exception as e:
      tf.logging.info("Unexpected error happened: %s.", e)
      raise
Exemple #19
0
def main(unused_argv=None):
  os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
  source_path = utils.shell_path(FLAGS.source_path)
  checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  save_path = utils.shell_path(FLAGS.save_path)
  if not save_path:
    raise ValueError("Must specify a save_path.")
  tf.logging.set_verbosity(FLAGS.log)

  # Use directory of files
  if tf.gfile.IsDirectory(source_path):
    files = tf.gfile.ListDirectory(source_path)
    file_extensions = [os.path.splitext(f)[1] for f in files]
    if ".wav" in file_extensions:
      file_extension = ".wav"
    elif ".npy" in file_extensions:
      file_extension = ".npy"
    else:
      raise RuntimeError("Folder must contain .wav or .npy files.")
    file_extension = ".npy" if FLAGS.npy_only else file_extension
    files = sorted([
        os.path.join(source_path, fname)
        for fname in files
        if fname.lower().endswith(file_extension)
    ])
  # Use a single file
  elif source_path.lower().endswith((".wav", ".npy")):
    file_extension = os.path.splitext(source_path.lower())[1]
    files = [source_path]
  else:
    raise ValueError(
        "source_path {} must be a folder or file.".format(source_path))

  # Now synthesize from files one batch at a time
  batch_size = FLAGS.batch_size
  sample_length = FLAGS.sample_length
  n = len(files)
  for start in range(0, n, batch_size):
    end = start + batch_size
    batch_files = files[start:end]
    save_names = [
        os.path.join(save_path,
                     "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
        for f in batch_files
    ]
    # Encode waveforms
    if file_extension == ".wav":
      batch_data = fastgen.load_batch_audio(
          batch_files, sample_length=sample_length)
      encodings = fastgen.encode(
          batch_data, checkpoint_path, sample_length=sample_length)
    # Or load encodings
    else:
      encodings = fastgen.load_batch_encodings(
          batch_files, sample_length=sample_length)
    # Synthesize multi-gpu
    if FLAGS.gpu_number != 0:
      with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
        fastgen.synthesize(
            encodings, save_names, checkpoint_path=checkpoint_path)
    # Single gpu
    else:
      fastgen.synthesize(
          encodings, save_names, checkpoint_path=checkpoint_path)
Exemple #20
0
import os
import numpy as np
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen

filename = '/data/input/battle1.wav'
sr = 44100
audio = utils.load_audio(filename, sample_length=(sr * 4), sr=sr)
sample_length = audio.shape[0]

print('{} samples, {} seconds'.format(sample_length,
                                      sample_length / float(sr)))

encoding = fastgen.encode(audio, '/data/model/wavenet-ckpt/model.ckpt-200000',
                          sample_length)

print(encoding.shape)
np.save(filename.replace('.wav', '') + '_encoded.npy', encoding)

fastgen.synthesize(
    encoding,
    save_paths=['/data/output/test.wav'],
    samples_per_save=sample_length,
    checkpoint_path="/data/model/wavenet-ckpt/model.ckpt-200000")
Exemple #21
0
def load_encoding(fname, sample_length=None, sr=16000, ckpt=MODEL):
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    encoding = fastgen.encode(audio, ckpt, sample_length)
    return audio, encoding
Exemple #22
0
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen
from IPython.display import Audio
#%matplotlib inline
#%config InlineBackend.figure_format = 'jpg'

fname = "03 Plimsoll Punks.wav"
ckpt = "model.ckpt-200000"
sr = 16000

audio = utils.load_audio(fname, sample_length=16000, sr=sr)
sample_length = audio.shape[0]

print ("{} samples , {} seconds".format(sample_length, sample_length/float(sr)))

encoding = fastgen.encode(audio, ckpt, sample_length)

print(encoding.shape)

np.save(fname.split(".")[0] + ".npy", encoding)

fig, axs = plt.subplots(2, 1, figsize=(10, 5))
axs[0].plot(audio);
axs[0].set_title("Audio Signal")
axs[1].plot(encoding[0]);
axs[1].set_title("NSynth Encoding")

# Verify fast to generate encoding
fastgen.synthesize(encoding, save_paths=["gen_" + fname], samples_per_save=sample_length)

sr = 16000
def unused():
    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    sr = 16000
    audio = utils.load_audio(fname, sample_length=40000, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr)))


    encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length)
    print(encoding.shape)

    np.save(fname + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio);
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0]);
    axs[1].set_title('NSynth Encoding')

    fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length)

    sr = 16000
    synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr)

    def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'):
        audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
        encoding = fastgen.encode(audio, ckpt, sample_length)
        return audio, encoding

    # from https://www.freesound.org/people/maurolupo/sounds/213259/
    fname = '213259__maurolupo__girl-sings-laa.wav'
    sample_length = 32000
    audio, encoding = load_encoding(fname, sample_length)
    fastgen.synthesize(
        encoding,
        save_paths=['gen_' + fname],
        samples_per_save=sample_length)
    synthesis = utils.load_audio('gen_' + fname,
                                 sample_length=sample_length,
                                 sr=sr)

    # use image interpolation to stretch the encoding: (pip install scikit-image)
    from skimage.transform import resize

    def timestretch(encodings, factor):
        min_encoding, max_encoding = encoding.min(), encoding.max()
        encodings_norm = (encodings - min_encoding) / (max_encoding - min_encoding)
        timestretches = []
        for encoding_i in encodings_norm:
            stretched = resize(encoding_i, (int(encoding_i.shape[0] * factor), encoding_i.shape[1]), mode='reflect')
            stretched = (stretched * (max_encoding - min_encoding)) + min_encoding
            timestretches.append(stretched)
        return np.array(timestretches)

    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    sample_length = 40000
    audio, encoding = load_encoding(fname, sample_length)

    audio = utils.load_audio('gen_slower_' + fname, sample_length=None, sr=sr)
    Audio(audio, rate=sr)

    encoding_slower = timestretch(encoding, 1.5)
    encoding_faster = timestretch(encoding, 0.5)

    fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True)
    axs[0].plot(encoding[0]);
    axs[0].set_title('Encoding (Normal Speed)')
    axs[1].plot(encoding_faster[0]);
    axs[1].set_title('Encoding (Faster))')
    axs[2].plot(encoding_slower[0]);
    axs[2].set_title('Encoding (Slower)')


    fastgen.synthesize(encoding_faster, save_paths=['gen_faster_' + fname])
    fastgen.synthesize(encoding_slower, save_paths=['gen_slower_' + fname])

    sample_length = 80000

    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    aud1, enc1 = load_encoding('395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav', sample_length)

    # from https://www.freesound.org/people/xserra/sounds/176098/
    aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav', sample_length)

    enc_mix = (enc1 + enc2) / 2.0

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0]);
    axs[1].set_title('Encoding 2')
    axs[2].plot(enc_mix[0]);
    axs[2].set_title('Average')

    fastgen.synthesize(enc_mix, save_paths='mix.wav')

    def fade(encoding, mode='in'):
        length = encoding.shape[1]
        fadein = (0.5 * (1.0 - np.cos(3.1415 * np.arange(length) /
                                      float(length)))).reshape(1, -1, 1)
        if mode == 'in':
            return fadein * encoding
        else:
            return (1.0 - fadein) * encoding

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Original Encoding')
    axs[1].plot(fade(enc1, 'in')[0]);
    axs[1].set_title('Fade In')
    axs[2].plot(fade(enc1, 'out')[0]);
    axs[2].set_title('Fade Out')

    def crossfade(encoding1, encoding2):
        return fade(encoding1, 'out') + fade(encoding2, 'in')


    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0]);
    axs[1].set_title('Encoding 2')
    axs[2].plot(crossfade(enc1, enc2)[0]);
    axs[2].set_title('Crossfade')

    fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])
Exemple #24
0
def load_encoding(fname, sample_length=None, sr=16000, ckpt='../wavenet-ckpt/model.ckpt-200000'):
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    encoding = fastgen.encode(audio, ckpt, sample_length)
    return audio, encoding
Exemple #25
0
axs[1].plot(SecondSong_begin)
axs[1].set_title('Second Song')
st.pyplot()

# Save original snippets
output_dir = '/home/ubuntu/DeepBass/src/notebooks/'
output_name1 = 'originalend_' + FirstSong_fname + '.wav'
Save(output_dir, output_name1, FirstSong_end, sr)
output_name2 = 'originalbegin_' + SecondSong_fname + '.wav'
Save(output_dir, output_name2, SecondSong_begin, sr)

model_dir = '/home/ubuntu/DeepBass/src/notebooks/wavenet-ckpt/model.ckpt-200000'

# Create encodings
start = time.time()
enc1 = fastgen.encode(FirstSong_end, model_dir, sample_length)
enc2 = fastgen.encode(SecondSong_begin, model_dir, sample_length)
end = time.time()
st.write('Encoding took ' + str((end - start)) + ' seconds')

# Create cross fading in the latent space
fade_type = 'LinearFade'
xfade_encoding = crossfade(enc1, enc2, fade_type)

fig, axs = plt.subplots(3, 1, figsize=(10, 7))
axs[0].plot(enc1[0])
axs[0].set_title('Encoding 1')
axs[1].plot(enc2[0])
axs[1].set_title('Encoding 2')
axs[2].plot(xfade_encoding[0])
axs[2].set_title('Crossfade')
Exemple #26
0
      padding = SAMPLE_LENGTH - len(audio_list[i])
      audio_list[i] = np.pad(audio_list[i], (0, padding), 'constant')

  audio_list = np.array(audio_list)
except Exception as e:
  print('Upload Cancelled')
  print(e)

"""# Encode

Next we need to encode the audio. This should be relatively fast on a GPU, we will also create interpolations (the midpoints between each encoding) from which to re-synthesize audio.
"""

#@title Generate Encodings
audio = np.array(audio_list)
z = fastgen.encode(audio, ckpt_path, SAMPLE_LENGTH)
print('Encoded %d files' % z.shape[0])


# Start with reconstructions
z_list = [z_ for z_ in z]
name_list = ['recon_' + name_ for name_ in names]

# Add all the mean interpolations
n = len(names)
for i in range(n - 1):
  for j in range(i + 1, n):
    new_z = (z[i] + z[j]) / 2.0
    new_name = 'interp_' + names[i] + '_X_'+ names[j]
    z_list.append(new_z)
    name_list.append(new_name)
Exemple #27
0
def wavenet_encode(wave):
    model_path = './wavenet-ckpt/wavenet-ckpt/model.ckpt-200000'  #模型位置
    # audio = np.load(file_path)
    encoding = fastgen.encode(wave, model_path, len(wave))
    print(encoding.reshape((-1, 16)).shape)
    return encoding.reshape((-1, 16))
Exemple #28
0
# Take the last four seconds
t_len = 1
silence_len = 7
x1 = x1[:silence_len * sr]
x1 = x1[-sr * t_len:]
sample_length = x1.shape[0]
output_dir = '/home/ubuntu/DeepBass/src/notebooks/'
output_name = 'original_' + filenames[0] + '.wav'
Save(output_dir, output_name, x1, sr)

model_dir = '/home/ubuntu/DeepBass/src/notebooks/wavenet-ckpt/model.ckpt-200000'

# Create encoding
start = time.time()
encoding = fastgen.encode(x1, model_dir, sample_length)
end = time.time()

st.write('Encoding took ' + str((end - start)) + ' seconds')
st.write('Encoding shape ' + str(encoding.shape))

# Save encoding
np.save(filenames[0] + '.npy', encoding)

# Plot PCM and encoding
fig, axs = plt.subplots(2, 1, figsize=(10, 5))
axs[0].plot(x1)
axs[0].set_title('Audio Signal')
axs[1].plot(encoding[0])
axs[1].set_title('NSynth Encoding')
st.pyplot()
Exemple #29
0
def main(unused_argv=None):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
    source_path = utils.shell_path(FLAGS.source_path)
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
    save_path = utils.shell_path(FLAGS.save_path)
    if not save_path:
        raise ValueError("Must specify a save_path.")
    tf.logging.set_verbosity(FLAGS.log)

    # Use directory of files
    if tf.gfile.IsDirectory(source_path):
        files = tf.gfile.ListDirectory(source_path)
        file_extensions = [os.path.splitext(f)[1] for f in files]
        if ".wav" in file_extensions:
            file_extension = ".wav"
        elif ".npy" in file_extensions:
            file_extension = ".npy"
        else:
            raise RuntimeError("Folder must contain .wav or .npy files.")
        file_extension = ".npy" if FLAGS.npy_only else file_extension
        files = sorted([
            os.path.join(source_path, fname) for fname in files
            if fname.lower().endswith(file_extension)
        ])
    # Use a single file
    elif source_path.lower().endswith((".wav", ".npy")):
        file_extension = os.path.splitext(source_path.lower())[1]
        files = [source_path]
    else:
        raise ValueError(
            "source_path {} must be a folder or file.".format(source_path))

    # Now synthesize from files one batch at a time
    batch_size = FLAGS.batch_size
    sample_length = FLAGS.sample_length
    n = len(files)
    for start in range(0, n, batch_size):
        end = start + batch_size
        batch_files = files[start:end]
        save_names = [
            os.path.join(
                save_path,
                "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
            for f in batch_files
        ]
        # Encode waveforms
        if file_extension == ".wav":
            batch_data = fastgen.load_batch_audio(batch_files,
                                                  sample_length=sample_length)
            encodings = fastgen.encode(batch_data,
                                       checkpoint_path,
                                       sample_length=sample_length)
        # Or load encodings
        else:
            encodings = fastgen.load_batch_encodings(
                batch_files, sample_length=sample_length)
        # Synthesize multi-gpu
        if FLAGS.gpu_number != 0:
            with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
                fastgen.synthesize(encodings,
                                   save_names,
                                   checkpoint_path=checkpoint_path)
        # Single gpu
        else:
            fastgen.synthesize(encodings,
                               save_names,
                               checkpoint_path=checkpoint_path)
Exemple #30
0
import os
import numpy as np
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen

fname = 'aggression.wav'
sr = 16000
audio = utils.load_audio(fname, sample_length=40000, sr=sr)
sample_length = audio.shape[0]
print('{} samples, {} seconds'.format(sample_length,
                                      sample_length / float(sr)))

encoding = fastgen.encode(audio, os.path.abspath('model.ckpt-200000'),
                          sample_length)
print(encoding.shape)
np.save(fname + '.npy', encoding)

fastgen.synthesize(encoding,
                   save_paths=['gen_' + fname],
                   samples_per_save=sample_length)

sr = 16000
synthesis = utils.load_audio('gen_' + fname,
                             sample_length=sample_length,
                             sr=sr)

print('Magenta Test')
Exemple #31
0
 def encode(self):
     self.encoding = fastgen.encode(self.audio, self.ckpt, self.sample_length)
def load_encoding(fname):
    audio = utils.load_audio(fname, sample_length=sl, sr=sr)
    print 'Encoding.. ', fname
    encoding = fastgen.encode(audio, ckpt, sl)
    print 'Encoded successfully'
    return audio, encoding