Ejemplo n.º 1
0
def load_batch_audio(files, sample_length=64000):
  """Load a batch of audio from either .wav files.

  Args:
    files: A list of filepaths to .wav files.
    sample_length: Maximum sample length

  Returns:
    batch: A padded array of audio [n_files, sample_length]
  """
  batch = []
  # Load the data
  for f in files:
    data = utils.load_audio(f, sample_length, sr=16000)
    length = data.shape[0]
    # Add padding if less than sample length
    if length < sample_length:
      padded = np.zeros([sample_length])
      padded[:length] = data
      batch.append(padded)
    else:
      batch.append(data)
  # Return as an numpy array
  batch = np.array(batch)
  return batch
Ejemplo n.º 2
0
    def wavenet_encode(self, file_path, **kwargs):

        if os.path.exists('../../Pretrained_models/wavenet-ckpt/'):

            # Load the model weights.
            checkpoint_path = '../../Pretrained_models/wavenet-ckpt/model.ckpt-200000'
        else:
            raise Exception(
                'you should download pretrained model to pretrained_models folder make prediction, the link is: http://download.magenta.tensorflow.org/models/nsynth/wavenet-ckpt.tar'
            )

        # Load and downsample the audio.
        neural_sample_rate = 16000
        audio = utils.load_audio(file_path,
                                 sample_length=400000,
                                 sr=neural_sample_rate)

        # Pass the audio through the first half of the autoencoder,
        # to get a list of latent variables that describe the sound.
        # Note that it would be quicker to pass a batch of audio
        # to fastgen.
        encoding = fastgen.encode(audio, checkpoint_path, len(audio))

        # Reshape to a single sound.
        return encoding.reshape((-1, 16))
Ejemplo n.º 3
0
def load_encoding(fname,
                  sample_lenght=None,
                  sr=16000,
                  ckpt='model.ckpt-200000'):
    audio = utils.load_audio(fname, sample_length=sample_lenght, sr=sr)
    encoding = fastgen.encode(audio, ckpt, sample_lenght)
    return audio, encoding
def encode(wav_filenames: List[str],
           checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000",
           sample_length: int = 16000,
           sample_rate: int = 16000) -> List[np.ndarray]:
    """
  Encodes the list of filename to encodings by loading the wav files,
  encoding them using fastgen, and returning the result.

  :param wav_filenames: the list of filenames to encode, they need to be
  present in the "sound" folder
  :param checkpoint: the checkpoint folder
  :param sample_length: the sample length, can be calculated by multiplying
  the desired number of seconds by 16000
  :param sample_rate: the sample rate, should be 16000
  """
    if not wav_filenames:
        return []

    # Loads the audio for each filenames
    audios = []
    for wav_filename in wav_filenames:
        audio = utils.load_audio(os.path.join("sounds", wav_filename),
                                 sample_length=sample_length,
                                 sr=sample_rate)
        audios.append(audio)

    # Encodes the audio for each new wav
    audios = np.array(audios)
    encodings = fastgen.encode(audios, checkpoint, sample_length)

    return encodings
Ejemplo n.º 5
0
def load_batch(files, sample_length=64000):
    """Load a batch of data from either .wav or .npy files.
    Args:
        files: A list of filepaths to .wav or .npy files
        sample_length: Maximum sample length
    Returns:
        batch_data: A padded array of audio or embeddings [batch, length, (dims)]
    """
    batch_data = []
    max_length = 0
    is_npy = (os.path.splitext(files[0])[1] == ".npy")
    # Load the data
    for f in files:
        if is_npy:
            data = np.load(f)
            batch_data.append(data)
        else:
            data = utils.load_audio(f, sample_length, sr=16000)
            batch_data.append(data)
        if data.shape[0] > max_length:
            max_length = data.shape[0]
    # Add padding
    for i, data in enumerate(batch_data):
        if data.shape[0] < max_length:
            if is_npy:
                padded = np.zeros([max_length, +data.shape[1]])
                padded[:data.shape[0], :] = data
            else:
                padded = np.zeros([max_length])
                padded[:data.shape[0]] = data
            batch_data[i] = padded
    # Return arrays
    batch_data = np.array(batch_data)
    return batch_data
Ejemplo n.º 6
0
def load_batch_audio(files, sample_length=64000):
    """Load a batch of audio from either .wav files.

  Args:
    files: A list of filepaths to .wav files.
    sample_length: Maximum sample length

  Returns:
    batch: A padded array of audio [n_files, sample_length]
  """
    batch = []
    # Load the data
    for f in files:
        data = utils.load_audio(f, sample_length, sr=16000)
        length = data.shape[0]
        # Add padding if less than sample length
        if length < sample_length:
            padded = np.zeros([sample_length])
            padded[:length] = data
            batch.append(padded)
        else:
            batch.append(data)
    # Return as an numpy array
    batch = np.array(batch)
    return batch
Ejemplo n.º 7
0
def decode(fname, sample_length=44100, sr=16000):
    fastgen.synthesize(encoding,
                       save_paths=['gen_' + fname],
                       samples_per_save=sample_length)
    synthesis = utils.load_audio('gen_' + fname,
                                 sample_length=sample_length,
                                 sr=sr)
    return synthesis
Ejemplo n.º 8
0
def wavenet_encode(file_path):
    neural_sample_rate = 16000
    audio = utils.load_audio(file_path,
                             sample_length=400000,
                             sr=neural_sample_rate)
    encoding = fastgen.encode(audio, '../wavenet-ckpt/model.ckpt-200000',
                              len(audio))
    return encoding.reshape((-1, 16))
Ejemplo n.º 9
0
def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'):
    '''sound loading'''
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr)))
    '''encoding'''
    encoding = fastgen.encode(audio, ckpt, sample_length)
    print("(batch_size, time_steps, dimensions) :",encoding.shape)
    np.save(fname[fname.rfind('/') + 1:] + '.npy', encoding)
    return audio, encoding
Ejemplo n.º 10
0
def upload(sample_length, sr):
  '''Upload a .wav file.'''
  filemap = files.upload()
  file_list, audio_list = [], []
  for key, value in filemap.iteritems():
    fname = os.path.join('/content/', key)
    with open(fname, 'w') as f:
      f.write(value)
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    file_list.append(fname)
    audio_list.append(audio)
  return file_list, audio_list
def load_encoding(_file,
                  sample_length=None,
                  sample_rate=16000,
                  ckpt='model.ckpt-200000'):
    '''
    Resamples signal to <sample_rate> and truncates it to <sample_length> elements
    Then encodes it through the model <ckpt>
    Returns a tuple (signal, encoded_signal)
    '''
    audio = utils.load_audio(_file,
                             sample_length=sample_length,
                             sr=sample_rate)
    encoding = fastgen.encode(audio, ckpt, sample_length)
    return audio, encoding
Ejemplo n.º 12
0
def encode(path, filename):
    print('encoding..')
    sr = 16000
    audio = utils.load_audio(path, sample_length=40000, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sr)))

    model_path = '/home/paperspace/data/wavenet-ckpt/model.ckpt-200000'
    encoding = fastgen.encode(audio, model_path, sample_length)
    print(encoding.shape)
    print('finished encoding..')
    # np.save(fname + '.npy', encoding)
    decode(encoding, path, filename, sample_length, model_path)
Ejemplo n.º 13
0
def encode(paths: List[str],
           sample_length: int = 16000,
           sample_rate: int = 16000,
           checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000") \
    -> np.ndarray:
    audios = []
    for path in paths:
        audio = utils.load_audio(path,
                                 sample_length=sample_length,
                                 sr=sample_rate)
        audios.append(audio)
    audios = np.array(audios)
    encodings = fastgen.encode(audios, checkpoint, sample_length)
    return encodings
Ejemplo n.º 14
0
def load_encoding(fname,
                  sample_length=None,
                  sr=16000,
                  ckpt='model.ckpt-200000'):
    '''sound loading'''
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sr)))
    '''encoding'''
    encoding = fastgen.encode(audio, ckpt, sample_length)
    print("(batch_size, time_steps, dimensions) :", encoding.shape)
    np.save(fname[fname.rfind('/') + 1:] + '.npy', encoding)
    return audio, encoding
Ejemplo n.º 15
0
def get_rb_vector(fname, sr=15360, window_size=16):
    audio = utils.load_audio(fname, sample_length=-1, sr=sr)
    sample_length = audio.shape[0]
    spec = utils.specgram(audio,
                          n_fft=512,
                          hop_length=None,
                          mask=True,
                          log_mag=True,
                          re_im=False,
                          dphase=True,
                          mag_only=False)
    mag = spec[:, :, 0]
    dphase = spec[:, :, 1]
    rb_vector = np.concatenate((dphase, mag), axis=0)
    return rb_vector[window_size / 2:, :]
Ejemplo n.º 16
0
def wavenet_encode(file_path):
    
    # Load the model weights.
    checkpoint_path = './wavenet-ckpt/model.ckpt-200000'
    
    # Load and downsample the audio.
    neural_sample_rate = 16000
    audio = utils.load_audio(file_path, 
                             sample_length=400000, 
                             sr=neural_sample_rate)
    
    # Pass the audio through the first half of the autoencoder,
    # to get a list of latent variables that describe the sound.
    # Note that it would be quicker to pass a batch of audio
    # to fastgen. 
    encoding = fastgen.encode(audio, checkpoint_path, len(audio))
    
    # Reshape to a single sound.
    return encoding.reshape((-1, 16))
Ejemplo n.º 17
0
def load_audio(wav_file, sample_length=64000):
    """Summary

    Parameters
    ----------
    wav_file : TYPE
        Description
    sample_length : int, optional
        Description

    Returns
    -------
    TYPE
        Description
    """
    wav_data = np.array([utils.load_audio(wav_file)[:sample_length]])
    wav_data_padded = np.zeros((1, sample_length))
    wav_data_padded[0, :wav_data.shape[1]] = wav_data
    wav_data = wav_data_padded
    return wav_data
Ejemplo n.º 18
0
def load_batch(files, sample_length=64000):
  """Load a batch of data from either .wav or .npy files.

  Args:
    files: A list of filepaths to .wav or .npy files
    sample_length: Maximum sample length

  Returns:
    batch_data: A padded array of audio or embeddings [batch, length, (dims)]
  """
  batch_data = []
  max_length = 0
  is_npy = (os.path.splitext(files[0])[1] == ".npy")
  # Load the data
  for f in files:
    if is_npy:
      data = np.load(f)
      batch_data.append(data)
    else:
      data = utils.load_audio(f, sample_length, sr=16000)
      batch_data.append(data)
    if data.shape[0] > max_length:
      max_length = data.shape[0]
  # Add padding
  for i, data in enumerate(batch_data):
    if data.shape[0] < max_length:
      if is_npy:
        padded = np.zeros([max_length, +data.shape[1]])
        padded[:data.shape[0], :] = data
      else:
        padded = np.zeros([max_length])
        padded[:data.shape[0]] = data
      batch_data[i] = padded
    else:
      batch_data[i] = data[np.newaxis, :, :]
  # Return arrays
  batch_data = np.vstack(batch_data)
  return batch_data
Ejemplo n.º 19
0
def encode():
    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    # fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    fname = './wav/mehldau-1.wav'
    sr = 44100
    audio = utils.load_audio(fname, sample_length=44100, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sr)))

    encoding = fastgen.encode(audio, './wavenet-ckpt/model.ckpt-200000',
                              sample_length)

    print(encoding.shape)

    np.save(fname + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio)
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0])
    axs[1].set_title('NSynth Encoding')
    return encoding
Ejemplo n.º 20
0
def Plot_SingleFile(file_name, sampleRate):
    audio = utils.load_audio(
        file_name,
        sample_length=70000)  #sample_length for how long will it be.
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sampleRate)))

    #Encoding for new sound part.
    encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length)
    print(encoding.shape)
    np.save(file_name + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio)
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0])
    axs[1].set_title('NSynth Encoding')

    #synthesis
    fastgen.synthesize(encoding,
                       save_paths=['gen_' + file_name],
                       samples_per_save=sample_length)
def main(unused_argv=None):
  tf.logging.set_verbosity(FLAGS.log)

  if FLAGS.checkpoint_path:
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  else:
    expdir = utils.shell_path(FLAGS.expdir)
    tf.logging.info("Will load latest checkpoint from %s.", expdir)
    while not tf.gfile.Exists(expdir):
      tf.logging.fatal("\tExperiment save dir '%s' does not exist!", expdir)
      sys.exit(1)

    try:
      checkpoint_path = tf.train.latest_checkpoint(expdir)
    except tf.errors.NotFoundError:
      tf.logging.fatal("There was a problem determining the latest checkpoint.")
      sys.exit(1)

  if not tf.train.checkpoint_exists(checkpoint_path):
    tf.logging.fatal("Invalid checkpoint path: %s", checkpoint_path)
    sys.exit(1)

  tf.logging.info("Will restore from checkpoint: %s", checkpoint_path)

  source_path = utils.shell_path(FLAGS.source_path)
  tf.logging.info("Will load Wavs from %s." % source_path)

  save_path = utils.shell_path(FLAGS.save_path)
  tf.logging.info("Will save embeddings to %s." % save_path)
  if not tf.gfile.Exists(save_path):
    tf.logging.info("Creating save directory...")
    tf.gfile.MakeDirs(save_path)

  sample_length = FLAGS.sample_length
  batch_size = FLAGS.batch_size

  def is_wav(f):
    return f.lower().endswith(".wav")

  wavfiles = sorted([
      os.path.join(source_path, fname)
      for fname in tf.gfile.ListDirectory(source_path) if is_wav(fname)
  ])

  for start_file in xrange(0, len(wavfiles), batch_size):
    batch_number = (start_file / batch_size) + 1
    tf.logging.info("On file number %s (batch %d).", start_file, batch_number)
    end_file = start_file + batch_size
    wavefiles_batch = wavfiles[start_file:end_file]

    # Ensure that files has batch_size elements.
    batch_filler = batch_size - len(wavefiles_batch)
    wavefiles_batch.extend(batch_filler * [wavefiles_batch[-1]])
    wav_data = np.array(
        [utils.load_audio(f, sample_length) for f in wavefiles_batch])
    try:
      tf.reset_default_graph()
      # Load up the model for encoding and find the encoding
      encoding = encode(wav_data, checkpoint_path, sample_length=sample_length)
      if encoding.ndim == 2:
        encoding = np.expand_dims(encoding, 0)

      tf.logging.info("Encoding:")
      tf.logging.info(encoding.shape)
      tf.logging.info("Sample length: %d" % sample_length)

      for num, (wavfile, enc) in enumerate(zip(wavefiles_batch, encoding)):
        filename = "%s_embeddings.npy" % wavfile.split("/")[-1].strip(".wav")
        with tf.gfile.Open(os.path.join(save_path, filename), "w") as f:
          np.save(f, enc)

        if num + batch_filler + 1 == batch_size:
          break
    except Exception as e:
      tf.logging.info("Unexpected error happened: %s.", e)
      raise
Ejemplo n.º 22
0
 def load_audio(self):
     self.audio = utils.load_audio(
         self.fname, sample_length=self.sample_length, sr=self.sr
     )
Ejemplo n.º 23
0
import os
import numpy as np
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen

filename = '/data/input/battle1.wav'
sr = 44100
audio = utils.load_audio(filename, sample_length=(sr * 4), sr=sr)
sample_length = audio.shape[0]

print('{} samples, {} seconds'.format(sample_length,
                                      sample_length / float(sr)))

encoding = fastgen.encode(audio, '/data/model/wavenet-ckpt/model.ckpt-200000',
                          sample_length)

print(encoding.shape)
np.save(filename.replace('.wav', '') + '_encoded.npy', encoding)

fastgen.synthesize(
    encoding,
    save_paths=['/data/output/test.wav'],
    samples_per_save=sample_length,
    checkpoint_path="/data/model/wavenet-ckpt/model.ckpt-200000")
Ejemplo n.º 24
0
def load_encoding(fname, sample_length=None, sr=16000, ckpt=MODEL):
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    encoding = fastgen.encode(audio, ckpt, sample_length)
    return audio, encoding
Ejemplo n.º 25
0
sample_length = 512

encoding_sine = np.load('wavetable_sine.npy')
encoding_tri = np.load('wavetable_tri.npy')
encoding_saw = np.load('wavetable_saw.npy')

#sawsin
for i in range(1, 100):
    filename = '../prerender/SawSin/SawSin_0.' + '%02d.txt' % i
    time0 = time.time()
    print('decoding saw+sine interpolation:' + '%02d' % i)
    fastgen.synthesize((encoding_saw * (100 - i) + encoding_sine * i) / 100,
                       save_paths=['tmp'],
                       checkpoint_path='Model/wavenet-ckpt/model.ckpt-200000',
                       samples_per_save=sample_length)
    audio = utils.load_audio('tmp', sample_length=512, sr=16000)
    np.savetxt(filename, [np.around(audio, decimals=5)],
               delimiter=',',
               fmt='%1.5f')
    print(time.time() - time0)

#sintri
for i in range(1, 100):
    filename = '../prerender/SinTri/SinTri_0.' + '%02d.txt' % i
    time0 = time.time()
    print('decoding sine+tri interpolation:' + '%02d' % i)
    fastgen.synthesize((encoding_sine * (100 - i) + encoding_tri * i) / 100,
                       save_paths=['tmp'],
                       checkpoint_path='Model/wavenet-ckpt/model.ckpt-200000',
                       samples_per_save=sample_length)
    audio = utils.load_audio('tmp', sample_length=512, sr=16000)
Ejemplo n.º 26
0
def load_encoding(fname, sample_length=None, sr=16000, ckpt='../wavenet-ckpt/model.ckpt-200000'):
    audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
    encoding = fastgen.encode(audio, ckpt, sample_length)
    return audio, encoding
Ejemplo n.º 27
0
import os
import numpy as np
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen

fname = 'aggression.wav'
sr = 16000
audio = utils.load_audio(fname, sample_length=40000, sr=sr)
sample_length = audio.shape[0]
print('{} samples, {} seconds'.format(sample_length,
                                      sample_length / float(sr)))

encoding = fastgen.encode(audio, os.path.abspath('model.ckpt-200000'),
                          sample_length)
print(encoding.shape)
np.save(fname + '.npy', encoding)

fastgen.synthesize(encoding,
                   save_paths=['gen_' + fname],
                   samples_per_save=sample_length)

sr = 16000
synthesis = utils.load_audio('gen_' + fname,
                             sample_length=sample_length,
                             sr=sr)

print('Magenta Test')
Ejemplo n.º 28
0
def load_encoding(fname):
    audio = utils.load_audio(fname, sample_length=sl, sr=sr)
    print 'Encoding.. ', fname
    encoding = fastgen.encode(audio, ckpt, sl)
    print 'Encoded successfully'
    return audio, encoding
def unused():
    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    sr = 16000
    audio = utils.load_audio(fname, sample_length=40000, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr)))


    encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length)
    print(encoding.shape)

    np.save(fname + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio);
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0]);
    axs[1].set_title('NSynth Encoding')

    fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length)

    sr = 16000
    synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr)

    def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'):
        audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
        encoding = fastgen.encode(audio, ckpt, sample_length)
        return audio, encoding

    # from https://www.freesound.org/people/maurolupo/sounds/213259/
    fname = '213259__maurolupo__girl-sings-laa.wav'
    sample_length = 32000
    audio, encoding = load_encoding(fname, sample_length)
    fastgen.synthesize(
        encoding,
        save_paths=['gen_' + fname],
        samples_per_save=sample_length)
    synthesis = utils.load_audio('gen_' + fname,
                                 sample_length=sample_length,
                                 sr=sr)

    # use image interpolation to stretch the encoding: (pip install scikit-image)
    from skimage.transform import resize

    def timestretch(encodings, factor):
        min_encoding, max_encoding = encoding.min(), encoding.max()
        encodings_norm = (encodings - min_encoding) / (max_encoding - min_encoding)
        timestretches = []
        for encoding_i in encodings_norm:
            stretched = resize(encoding_i, (int(encoding_i.shape[0] * factor), encoding_i.shape[1]), mode='reflect')
            stretched = (stretched * (max_encoding - min_encoding)) + min_encoding
            timestretches.append(stretched)
        return np.array(timestretches)

    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    sample_length = 40000
    audio, encoding = load_encoding(fname, sample_length)

    audio = utils.load_audio('gen_slower_' + fname, sample_length=None, sr=sr)
    Audio(audio, rate=sr)

    encoding_slower = timestretch(encoding, 1.5)
    encoding_faster = timestretch(encoding, 0.5)

    fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True)
    axs[0].plot(encoding[0]);
    axs[0].set_title('Encoding (Normal Speed)')
    axs[1].plot(encoding_faster[0]);
    axs[1].set_title('Encoding (Faster))')
    axs[2].plot(encoding_slower[0]);
    axs[2].set_title('Encoding (Slower)')


    fastgen.synthesize(encoding_faster, save_paths=['gen_faster_' + fname])
    fastgen.synthesize(encoding_slower, save_paths=['gen_slower_' + fname])

    sample_length = 80000

    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    aud1, enc1 = load_encoding('395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav', sample_length)

    # from https://www.freesound.org/people/xserra/sounds/176098/
    aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav', sample_length)

    enc_mix = (enc1 + enc2) / 2.0

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0]);
    axs[1].set_title('Encoding 2')
    axs[2].plot(enc_mix[0]);
    axs[2].set_title('Average')

    fastgen.synthesize(enc_mix, save_paths='mix.wav')

    def fade(encoding, mode='in'):
        length = encoding.shape[1]
        fadein = (0.5 * (1.0 - np.cos(3.1415 * np.arange(length) /
                                      float(length)))).reshape(1, -1, 1)
        if mode == 'in':
            return fadein * encoding
        else:
            return (1.0 - fadein) * encoding

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Original Encoding')
    axs[1].plot(fade(enc1, 'in')[0]);
    axs[1].set_title('Fade In')
    axs[2].plot(fade(enc1, 'out')[0]);
    axs[2].set_title('Fade Out')

    def crossfade(encoding1, encoding2):
        return fade(encoding1, 'out') + fade(encoding2, 'in')


    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0]);
    axs[1].set_title('Encoding 2')
    axs[2].plot(crossfade(enc1, enc2)[0]);
    axs[2].set_title('Crossfade')

    fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])
Ejemplo n.º 30
0
def synthesize(source_file,
               checkpoint_path="model.ckpt-200000",
               out_file="synthesis.wav",
               sample_length=64000,
               samples_per_save=1000):
    """Resynthesize an input audio file.

  Args:
    source_file: Location of a wave or .npy file to load.
    checkpoint_path: Location of the pretrained model. [model.ckpt-200000]
    out_file: Location to save the synthesized wave file. [synthesis.wav]
    sample_length: Length of file to synthesize. [source_file.length]
    samples_per_save: Save a .wav after every amount of samples.

  Raises:
    RuntimeError: Source_file should be .wav or .npy.
  """
    if source_file.endswith(".npy"):
        encoding = np.load(source_file)
        hop_length = Config().ae_hop_length
    elif source_file.endswith(".wav"):
        # Audio to resynthesize
        wav_data = utils.load_audio(source_file, sample_length, sr=16000)
        # Load up the model for encoding and find the encoding
        encoding, hop_length = encode(wav_data,
                                      checkpoint_path,
                                      sample_length=sample_length)
        if encoding.ndim == 3:
            encoding = encoding[0]
    else:
        raise RuntimeError("File must be .wav or .npy")
    # Get lengths
    encoding_length = encoding.shape[0]
    total_length = encoding_length * hop_length

    session_config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Graph().as_default(), tf.Session(config=session_config) as sess:
        net = load_fastgen_nsynth()
        saver = tf.train.Saver()
        saver.restore(sess, checkpoint_path)

        # initialize queues w/ 0s
        sess.run(net["init_ops"])

        # Regenerate the audio file sample by sample
        wav_synth = np.zeros((total_length, ), dtype=np.float32)
        audio = np.float32(0)

        for sample_i in range(total_length):
            enc_i = sample_i // hop_length
            pmf = sess.run([net["predictions"], net["push_ops"]],
                           feed_dict={
                               net["X"]: np.atleast_2d(audio),
                               net["encoding"]: encoding[enc_i]
                           })[0]
            sample_bin = sample_categorical(pmf)
            audio = utils.inv_mu_law_numpy(sample_bin - 128)
            wav_synth[sample_i] = audio
            if sample_i % 100 == 0:
                tf.logging.info("Sample: %d" % sample_i)
            if sample_i % samples_per_save == 0:
                wavfile.write(out_file, 16000, wav_synth)

    wavfile.write(out_file, 16000, wav_synth)
Ejemplo n.º 31
0
                batch_counter = 0
                visual_batch = [None]*batch_size
                audio_batch = [None]*batch_size
                

visual_feat_all = []
audio_feat_all = []
xxx = 0
for i in file_name:
    if xxx ==1:
        break
    xxx += 1
    visual_feat = np.load(gan_folder + 'label_' + i + '.npy')
    fname = audio_folder + "GOPR" + i + '.MP4.wav'
    sr = 16000
    audio = utils.load_audio(fname, sample_length=-1, sr=sr)
    sample_length = audio.shape[0]
    spec = utils.specgram(audio,
                n_fft=512,
                hop_length=None,
                mask=True,
                log_mag=True,
                re_im=False,
                dphase=True,
                mag_only=False)
    mag = spec[:,:,0]
    dphase = spec[:,:,1]
    combine = np.concatenate((dphase, mag), axis=0)
    combineT = np.transpose(combine)

    visual_feat = visual_feat.reshape(visual_feat.shape[0],visual_feat.shape[2])
Ejemplo n.º 32
0
def load_wav(file_path):
    return utils.load_audio(file_path)
Ejemplo n.º 33
0
import tensorflow as tf, os, numpy as np, matplotlib.pyplot as plt, time
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen
from IPython.display import Audio
#%matplotlib inline
#%config InlineBackend.figure_format = 'jpg'

fname = "03 Plimsoll Punks.wav"
ckpt = "model.ckpt-200000"
sr = 16000

audio = utils.load_audio(fname, sample_length=16000, sr=sr)
sample_length = audio.shape[0]

print ("{} samples , {} seconds".format(sample_length, sample_length/float(sr)))

encoding = fastgen.encode(audio, ckpt, sample_length)

print(encoding.shape)

np.save(fname.split(".")[0] + ".npy", encoding)

fig, axs = plt.subplots(2, 1, figsize=(10, 5))
axs[0].plot(audio);
axs[0].set_title("Audio Signal")
axs[1].plot(encoding[0]);
axs[1].set_title("NSynth Encoding")

# Verify fast to generate encoding
fastgen.synthesize(encoding, save_paths=["gen_" + fname], samples_per_save=sample_length)