예제 #1
0
def interpolate(recordedAudio, instrument):
    global voiceWeight, defaultInstrument

    sample_length = 80000

    instrumentType = defaultInstrumemt
    if(instrument != ""):
        instrumentType = instrument

    instrumentFile = "./sounds/recordings/instrument_recordings/" + instrumentType + ".wav"
    instrumentEncoding = "./sounds/encodings/instrument_encodings/" + instrumentType + ".npy"

    aud1, enc1 = load_encoding(recordedAudio, sample_length)
    #check if encoding already exists
    enc2 = None
    print("encoding path", instrumentEncoding)
    if(checkAudio(instrument + ".npy")):
        print("encoding exists")
        enc2 = np.load(instrumentEncoding)
    else:
        print("encoding does not exist")
        aud2, enc2 = load_encoding(instrumentFile, sample_length)
        np.save(instrumentEncoding, enc2)

    enc_mix = (1.5 * enc1 + enc2)/2.0
    outputPath = './mixes/mix' + str(len(os.listdir('./mixes'))) + '.wav'
    fastgen.synthesize(enc_mix, checkpoint_path='../wavenet-ckpt/model.ckpt-200000', save_paths=[outputPath])
예제 #2
0
def synthesizer():

    print 'Synthesizing with Sample rate: ' + str(
        sr) + ' and Sample length: ' + str(sl)

    for dirpath, dirnames, filenames in os.walk(inputpath):
        newdir = dirpath[len(inputpath):]
        newdir = newdir[1:]
        structure = os.path.join(outputpath, newdir)
        print structure
        if not os.path.isdir(structure):
            os.mkdir(structure)
            for fname in filenames:
                if fnmatch.fnmatch(fname, '*.wav'):
                    # print 'encoding path: ',
                    print fname
                    audio, encoding = load_encoding(
                        os.path.join(dirpath, fname))
                    # np.save(structure + fname + '.npy', encoding)
                    print 'Syntheszing ' + fname + '.. '
                    fastgen.synthesize(
                        encoding,
                        save_paths=[os.path.join(structure, fname)],
                        checkpoint_path=ckpt,
                        samples_per_save=sl)
                    print fname + ': Done!'
        else:
            print("Folder: " + structure + " already exists!")
            return
예제 #3
0
def decode(encoding, path, filename, sample_length, model_path):
    print('decoding..')
    outdir = '/home/paperspace/data/sounds_gen/'
    fastgen.synthesize(encoding,
                       save_paths=[outdir + filename],
                       checkpoint_path=model_path,
                       samples_per_save=sample_length)
    print('finished decoding..')
예제 #4
0
 def decode(self):
     fastgen.synthesize(
         self.encoding,
         save_paths=[self.fname],
         checkpoint_path=self.ckpt,
         samples_per_save=self.sample_length,
     )
     self.load_audio()
예제 #5
0
def decode(fname, sample_length=44100, sr=16000):
    fastgen.synthesize(encoding,
                       save_paths=['gen_' + fname],
                       samples_per_save=sample_length)
    synthesis = utils.load_audio('gen_' + fname,
                                 sample_length=sample_length,
                                 sr=sr)
    return synthesis
예제 #6
0
def main(unused_argv=None):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
    source_path = utils.shell_path(FLAGS.source_path)
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
    save_path = utils.shell_path(FLAGS.save_path)
    if not save_path:
        raise RuntimeError("Must specify a save_path.")
    tf.logging.set_verbosity(FLAGS.log)

    # Generate from wav files
    if tf.gfile.IsDirectory(source_path):
        files = tf.gfile.ListDirectory(source_path)
        exts = [os.path.splitext(f)[1] for f in files]
        if ".wav" in exts:
            postfix = ".wav"
        elif ".npy" in exts:
            postfix = ".npy"
        else:
            raise RuntimeError("Folder must contain .wav or .npy files.")
        postfix = ".npy" if FLAGS.npy_only else postfix
        files = sorted([
            os.path.join(source_path, fname) for fname in files
            if fname.lower().endswith(postfix)
        ])

    elif source_path.lower().endswith((".wav", ".npy")):
        files = [source_path]
    else:
        files = []

    # Now synthesize from files one batch at a time
    batch_size = FLAGS.batch_size
    sample_length = FLAGS.sample_length
    n = len(files)
    for start in range(0, n, batch_size):
        end = start + batch_size
        batch_files = files[start:end]
        save_names = [
            os.path.join(
                save_path,
                "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
            for f in batch_files
        ]
        print('loading batch..')
        batch_data = fastgen.load_batch(batch_files,
                                        sample_length=sample_length)
        # Encode waveforms
        encodings = batch_data if postfix == ".npy" else fastgen.encode(
            batch_data, checkpoint_path, sample_length=sample_length)
        if FLAGS.gpu_number != 0:
            with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
                fastgen.synthesize(encodings,
                                   save_names,
                                   checkpoint_path=checkpoint_path)
        else:
            fastgen.synthesize(encodings,
                               save_names,
                               checkpoint_path=checkpoint_path)
예제 #7
0
def synthesize(encoding_mix: np.ndarray,
               checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000"):
    os.makedirs(os.path.join("output", "synth"), exist_ok=True)
    date_and_time = time.strftime("%Y-%m-%d_%H%M%S")
    output = os.path.join("output", "synth", f"{date_and_time}.wav")
    encoding_mix = np.array([encoding_mix])
    fastgen.synthesize(encoding_mix,
                       checkpoint_path=checkpoint,
                       save_paths=[output])
예제 #8
0
def synthesizeEncodings(encoding1Name, encoding2Name, encoding1, encoding2, encoding1Weight, encoding2Weight):
    global voiceWeight, defaultInstrument

    sample_length = 80000
    voiceEncodingPath = "./sounds/encodings/voice_encodings/recording3_encoding.npy"
    instrumentEncodingPath = "./sounds/encodings/instrument_encodings/cello.npy"

    enc_mix = (encoding1Weight * encoding1 + encoding2Weight * encoding2) / 2.0
    outputFile = encoding1Name + "_" + str(encoding1Weight) + "_" + encoding2Name + "_" + str(encoding2Weight)
    outputPath = './mixes/weight_experiments/' + outputFile + '.wav'
    fastgen.synthesize(enc_mix, checkpoint_path='../wavenet-ckpt/model.ckpt-200000', save_paths=[outputPath])
예제 #9
0
def main(unused_argv=None):
  os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
  source_path = utils.shell_path(FLAGS.source_path)
  checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  save_path = utils.shell_path(FLAGS.save_path)
  if not save_path:
    raise RuntimeError("Must specify a save_path.")
  tf.logging.set_verbosity(FLAGS.log)

  # Generate from wav files
  if tf.gfile.IsDirectory(source_path):
    files = tf.gfile.ListDirectory(source_path)
    exts = [os.path.splitext(f)[1] for f in files]
    if ".wav" in exts:
      postfix = ".wav"
    elif ".npy" in exts:
      postfix = ".npy"
    else:
      raise RuntimeError("Folder must contain .wav or .npy files.")
    postfix = ".npy" if FLAGS.npy_only else postfix
    files = sorted([
        os.path.join(source_path, fname)
        for fname in files
        if fname.lower().endswith(postfix)
    ])

  elif source_path.lower().endswith((".wav", ".npy")):
    files = [source_path]
  else:
    files = []

  # Now synthesize from files one batch at a time
  batch_size = FLAGS.batch_size
  sample_length = FLAGS.sample_length
  n = len(files)
  for start in range(0, n, batch_size):
    end = start + batch_size
    batch_files = files[start:end]
    save_names = [
        os.path.join(save_path,
                     "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
        for f in batch_files
    ]
    batch_data = fastgen.load_batch(batch_files, sample_length=sample_length)
    # Encode waveforms
    encodings = batch_data if postfix == ".npy" else fastgen.encode(
        batch_data, checkpoint_path, sample_length=sample_length)
    if FLAGS.gpu_number != 0:
      with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
        fastgen.synthesize(
            encodings, save_names, checkpoint_path=checkpoint_path)
    else:
      fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def synthesize(encodings_mix: np.ndarray,
               encodings_mix_name: List[str],
               checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000") \
    -> None:
  """
  Synthetizes the list of encodings and saves them under the list of names.
  This might take a long time on commodity hardware (~15 minutes)

  :param encodings_mix: the list of encodings to synth
  :param encodings_mix_name: the list of encodings names for the files
  :param checkpoint: the checkpoint folder
  """
  os.makedirs(os.path.join("output", "nsynth"), exist_ok=True)
  encodings_mix_name = [os.path.join("output", "nsynth",
                                     encoding_mix_name + ".wav")
                        for encoding_mix_name in encodings_mix_name]
  fastgen.synthesize(encodings_mix,
                     checkpoint_path=checkpoint,
                     save_paths=encodings_mix_name)
예제 #11
0
def interpolate():
    sample_length = 80000
    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    aud1, enc1 = load_encoding(
        '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav',
        sample_length)
    # from https://www.freesound.org/people/xserra/sounds/176098/
    aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav',
                               sample_length)
    enc_mix = (enc1 + enc2) / 2.0

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0])
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0])
    axs[1].set_title('Encoding 2')
    axs[2].plot(enc_mix[0])
    axs[2].set_title('Average')

    fastgen.synthesize(enc_mix, save_paths='mix.wav')
예제 #12
0
def merge_sounds(audio_list, skip_existing=True):
    # Output length = sample_length / sample_rate about 3 seconds
    sample_length = 40000
    sample_rate = 13300
    audio_1 = audio_list[0]
    audio_2 = audio_list[1]
    audio_name_1 = audio_1.split(SEPARATOR)[-1].split('.')[0]
    audio_name_2 = audio_2.split(SEPARATOR)[-1].split('.')[0]
    output_name = ''.join(sorted([audio_name_1, audio_name_2]))
    output_path = '{}{}{}.wav'.format(OUTPUT_DIR, SEPARATOR, output_name)

    if (os.path.exists(output_path) and skip_existing):
        print('Skipping sounds "{}" and "{}"'.format(audio_1, audio_2))
        return output_path

    print('Merging sounds "{}" and "{}"'.format(audio_1, audio_2))

    sample_length = 35000
    try:
        print("Loading Audio_1")
        aud1, enc1 = load_encoding(audio_1,
                                   sample_length=sample_length,
                                   sr=sample_rate)
        print("Loading Audio_2")
        aud2, enc2 = load_encoding(audio_2,
                                   sample_length=sample_length,
                                   sr=sample_rate)

        enc_mix = (enc1 + enc2) / 2.0

        print("Synthesizing new audio: {}".format(output_name))
        fastgen.synthesize(enc_mix,
                           checkpoint_path=MODEL,
                           save_paths=[output_path])
    except Exception as e:
        print('Erro skipping combo: {},\nError: {}'.format(
            str(output_name), str(e)))

    return output_path
예제 #13
0
def Plot_SingleFile(file_name, sampleRate):
    audio = utils.load_audio(
        file_name,
        sample_length=70000)  #sample_length for how long will it be.
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length,
                                          sample_length / float(sampleRate)))

    #Encoding for new sound part.
    encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length)
    print(encoding.shape)
    np.save(file_name + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio)
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0])
    axs[1].set_title('NSynth Encoding')

    #synthesis
    fastgen.synthesize(encoding,
                       save_paths=['gen_' + file_name],
                       samples_per_save=sample_length)
예제 #14
0
def main(unused_argv=None):
    source_path = utils.shell_path(FLAGS.source_path)
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
    save_path = utils.shell_path(FLAGS.save_path)
    if not save_path:
        raise RuntimeError("Must specify a save_path.")
    tf.logging.set_verbosity(FLAGS.log)

    # Generate from wav files
    if tf.gfile.IsDirectory(source_path):
        files = tf.gfile.ListDirectory(source_path)
        exts = [os.path.splitext(f)[1] for f in files]
        if ".wav" in exts:
            postfix = ".wav"
        elif ".npy" in exts:
            postfix = ".npy"
        else:
            raise RuntimeError("Folder must contain .wav or .npy files.")
        postfix = ".npy" if FLAGS.encodings else postfix
        files = sorted([
            os.path.join(source_path, fname) for fname in files
            if fname.lower().endswith(postfix)
        ])

    elif source_path.lower().endswith(postfix):
        files = [source_path]
    else:
        files = []
    for f in files:
        out_file = os.path.join(
            save_path,
            "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
        tf.logging.info("OUTFILE %s" % out_file)
        synthesize(source_file=f,
                   checkpoint_path=checkpoint_path,
                   out_file=out_file,
                   sample_length=FLAGS.sample_length)
예제 #15
0
def decoding(fname, sample_length, sr, encoding):
    fastgen.synthesize(
        encoding,
        save_paths=['gen_' + fname[fname.rfind('/') + 1:] ],
        samples_per_save=sample_length)
예제 #16
0
def main(unused_argv=None):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
    source_path = utils.shell_path(FLAGS.source_path)
    checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
    save_path = utils.shell_path(FLAGS.save_path)
    if not save_path:
        raise ValueError("Must specify a save_path.")
    tf.logging.set_verbosity(FLAGS.log)

    # Use directory of files
    if tf.gfile.IsDirectory(source_path):
        files = tf.gfile.ListDirectory(source_path)
        file_extensions = [os.path.splitext(f)[1] for f in files]
        if ".wav" in file_extensions:
            file_extension = ".wav"
        elif ".npy" in file_extensions:
            file_extension = ".npy"
        else:
            raise RuntimeError("Folder must contain .wav or .npy files.")
        file_extension = ".npy" if FLAGS.npy_only else file_extension
        files = sorted([
            os.path.join(source_path, fname) for fname in files
            if fname.lower().endswith(file_extension)
        ])
    # Use a single file
    elif source_path.lower().endswith((".wav", ".npy")):
        file_extension = os.path.splitext(source_path.lower())[1]
        files = [source_path]
    else:
        raise ValueError(
            "source_path {} must be a folder or file.".format(source_path))

    # Now synthesize from files one batch at a time
    batch_size = FLAGS.batch_size
    sample_length = FLAGS.sample_length
    n = len(files)
    for start in range(0, n, batch_size):
        end = start + batch_size
        batch_files = files[start:end]
        save_names = [
            os.path.join(
                save_path,
                "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
            for f in batch_files
        ]
        # Encode waveforms
        if file_extension == ".wav":
            batch_data = fastgen.load_batch_audio(batch_files,
                                                  sample_length=sample_length)
            encodings = fastgen.encode(batch_data,
                                       checkpoint_path,
                                       sample_length=sample_length)
        # Or load encodings
        else:
            encodings = fastgen.load_batch_encodings(
                batch_files, sample_length=sample_length)
        # Synthesize multi-gpu
        if FLAGS.gpu_number != 0:
            with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
                fastgen.synthesize(encodings,
                                   save_names,
                                   checkpoint_path=checkpoint_path)
        # Single gpu
        else:
            fastgen.synthesize(encodings,
                               save_names,
                               checkpoint_path=checkpoint_path)
예제 #17
0
def encodeAndDecode(recordingName, recordingPath):
    aud1, enc1 = load_encoding(recordingPath, 80000)
    encodeOutputPath = './sounds/encodings/voice_encodings/' + recordingName + '_encoding' + str(len(os.listdir('./sounds/encodings/voice_encodings')))
    decodeOutputPath = './mixes/single_voice_decoding/' + recordingName + '_decoding' + str(len(os.listdir('./mixes/single_voice_decoding'))) + '.wav'
    np.save(encodeOutputPath, enc1)
    fastgen.synthesize(enc1, checkpoint_path='../wavenet-ckpt/model.ckpt-200000', save_paths=[decodeOutputPath])
예제 #18
0
def main(unused_argv=None):
  os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number)
  source_path = utils.shell_path(FLAGS.source_path)
  checkpoint_path = utils.shell_path(FLAGS.checkpoint_path)
  save_path = utils.shell_path(FLAGS.save_path)
  if not save_path:
    raise ValueError("Must specify a save_path.")
  tf.logging.set_verbosity(FLAGS.log)

  # Use directory of files
  if tf.gfile.IsDirectory(source_path):
    files = tf.gfile.ListDirectory(source_path)
    file_extensions = [os.path.splitext(f)[1] for f in files]
    if ".wav" in file_extensions:
      file_extension = ".wav"
    elif ".npy" in file_extensions:
      file_extension = ".npy"
    else:
      raise RuntimeError("Folder must contain .wav or .npy files.")
    file_extension = ".npy" if FLAGS.npy_only else file_extension
    files = sorted([
        os.path.join(source_path, fname)
        for fname in files
        if fname.lower().endswith(file_extension)
    ])
  # Use a single file
  elif source_path.lower().endswith((".wav", ".npy")):
    file_extension = os.path.splitext(source_path.lower())[1]
    files = [source_path]
  else:
    raise ValueError(
        "source_path {} must be a folder or file.".format(source_path))

  # Now synthesize from files one batch at a time
  batch_size = FLAGS.batch_size
  sample_length = FLAGS.sample_length
  n = len(files)
  for start in range(0, n, batch_size):
    end = start + batch_size
    batch_files = files[start:end]
    save_names = [
        os.path.join(save_path,
                     "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav")
        for f in batch_files
    ]
    # Encode waveforms
    if file_extension == ".wav":
      batch_data = fastgen.load_batch_audio(
          batch_files, sample_length=sample_length)
      encodings = fastgen.encode(
          batch_data, checkpoint_path, sample_length=sample_length)
    # Or load encodings
    else:
      encodings = fastgen.load_batch_encodings(
          batch_files, sample_length=sample_length)
    # Synthesize multi-gpu
    if FLAGS.gpu_number != 0:
      with tf.device("/device:GPU:%d" % FLAGS.gpu_number):
        fastgen.synthesize(
            encodings, save_names, checkpoint_path=checkpoint_path)
    # Single gpu
    else:
      fastgen.synthesize(
          encodings, save_names, checkpoint_path=checkpoint_path)
예제 #19
0
import os
import numpy as np
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen

filename = '/data/input/battle1.wav'
sr = 44100
audio = utils.load_audio(filename, sample_length=(sr * 4), sr=sr)
sample_length = audio.shape[0]

print('{} samples, {} seconds'.format(sample_length,
                                      sample_length / float(sr)))

encoding = fastgen.encode(audio, '/data/model/wavenet-ckpt/model.ckpt-200000',
                          sample_length)

print(encoding.shape)
np.save(filename.replace('.wav', '') + '_encoded.npy', encoding)

fastgen.synthesize(
    encoding,
    save_paths=['/data/output/test.wav'],
    samples_per_save=sample_length,
    checkpoint_path="/data/model/wavenet-ckpt/model.ckpt-200000")
예제 #20
0
st.write('Encoding took ' + str((end - start)) + ' seconds')
st.write('Encoding shape ' + str(encoding.shape))

# Save encoding
np.save(filenames[0] + '.npy', encoding)

# Plot PCM and encoding
fig, axs = plt.subplots(2, 1, figsize=(10, 5))
axs[0].plot(x1)
axs[0].set_title('Audio Signal')
axs[1].plot(encoding[0])
axs[1].set_title('NSynth Encoding')
st.pyplot()

# Decoding
start = time.time()
fastgen.synthesize(encoding,
                   checkpoint_path=model_dir,
                   save_paths=['gen_' + filenames[0]],
                   samples_per_save=sample_length)
end = time.time()
st.write('Decoding took ' + str((end - start)) + ' seconds')

# Evaluate reconstruction
x1_gen, _ = Load(output_dir, 'gen_' + filenames[0], sr=sr)
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(x1_gen)
ax.set_title('Reconstructed Audio Signal')
st.pyplot()
예제 #21
0
fname = "03 Plimsoll Punks.wav"
ckpt = "model.ckpt-200000"
sr = 16000

audio = utils.load_audio(fname, sample_length=16000, sr=sr)
sample_length = audio.shape[0]

print ("{} samples , {} seconds".format(sample_length, sample_length/float(sr)))

encoding = fastgen.encode(audio, ckpt, sample_length)

print(encoding.shape)

np.save(fname.split(".")[0] + ".npy", encoding)

fig, axs = plt.subplots(2, 1, figsize=(10, 5))
axs[0].plot(audio);
axs[0].set_title("Audio Signal")
axs[1].plot(encoding[0]);
axs[1].set_title("NSynth Encoding")

# Verify fast to generate encoding
fastgen.synthesize(encoding, save_paths=["gen_" + fname], samples_per_save=sample_length)

sr = 16000

# Output file. Listen to it to see what nerual synthesis does. Note this uses 8-bit mu-law 
# therefore the sound quality is not good. Will later used better resolution
# Be patient. This takes at least 15 min to teminate
synthesis = utils.load_audio("gen_" + fname, sample_length=sample_length, sr=sr)
예제 #22
0
def decoding(fname, sample_length, sr, encoding):
    fastgen.synthesize(encoding,
                       save_paths=['gen_' + fname[fname.rfind('/') + 1:]],
                       samples_per_save=sample_length)
audio, encoding = load_encoding(_file, sample_length, sample_rate, _model)
np.save(without_extension(_file) + '.npy', encoding)
print("(batch_size, time_steps, dimensions) :", encoding.shape)

# plotting #
if PLOT:
    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio)
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0])
    axs[1].set_title('NSynth Encoding')

# decoding #
'''Synthesizes audio from the encoding and saves it'''
fastgen.synthesize(
    encoding,
    save_paths=[without_extension(_file) + "_decoded." + get_extension(_file)],
    samples_per_save=sample_length)

if DEBUG:
    print("Generation for normal encoding achieved !")

# slower and faster encoding #
encoding_slower = timestretch(encoding, 1.5)
encoding_faster = timestretch(encoding, 0.5)

if PLOT:
    fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True)
    axs[0].plot(encoding[0])
    axs[0].set_title('Encoding (Normal Speed)')
    axs[1].plot(encoding_faster[0])
    axs[1].set_title('Encoding (Faster))')
예제 #24
0
def synth():
    fastgen.synthesize(xfade_encoding, checkpoint_path = model_dir,
                       save_paths=['enc_' + fade_type + '_' + FirstSong_fname + \
                                   SecondSong_fname],
                       samples_per_save=sample_length)
    return None
예제 #25
0
from magenta.models.nsynth.wavenet import fastgen
from scipy import signal
CUDA_VISIBLE_DEVICES = ""
sample_length = 512

encoding_sine = np.load('wavetable_sine.npy')
encoding_tri = np.load('wavetable_tri.npy')
encoding_saw = np.load('wavetable_saw.npy')

#sawsin
for i in range(1, 100):
    filename = '../prerender/SawSin/SawSin_0.' + '%02d.txt' % i
    time0 = time.time()
    print('decoding saw+sine interpolation:' + '%02d' % i)
    fastgen.synthesize((encoding_saw * (100 - i) + encoding_sine * i) / 100,
                       save_paths=['tmp'],
                       checkpoint_path='Model/wavenet-ckpt/model.ckpt-200000',
                       samples_per_save=sample_length)
    audio = utils.load_audio('tmp', sample_length=512, sr=16000)
    np.savetxt(filename, [np.around(audio, decimals=5)],
               delimiter=',',
               fmt='%1.5f')
    print(time.time() - time0)

#sintri
for i in range(1, 100):
    filename = '../prerender/SinTri/SinTri_0.' + '%02d.txt' % i
    time0 = time.time()
    print('decoding sine+tri interpolation:' + '%02d' % i)
    fastgen.synthesize((encoding_sine * (100 - i) + encoding_tri * i) / 100,
                       save_paths=['tmp'],
                       checkpoint_path='Model/wavenet-ckpt/model.ckpt-200000',
def unused():
    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    sr = 16000
    audio = utils.load_audio(fname, sample_length=40000, sr=sr)
    sample_length = audio.shape[0]
    print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr)))


    encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length)
    print(encoding.shape)

    np.save(fname + '.npy', encoding)

    fig, axs = plt.subplots(2, 1, figsize=(10, 5))
    axs[0].plot(audio);
    axs[0].set_title('Audio Signal')
    axs[1].plot(encoding[0]);
    axs[1].set_title('NSynth Encoding')

    fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length)

    sr = 16000
    synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr)

    def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'):
        audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
        encoding = fastgen.encode(audio, ckpt, sample_length)
        return audio, encoding

    # from https://www.freesound.org/people/maurolupo/sounds/213259/
    fname = '213259__maurolupo__girl-sings-laa.wav'
    sample_length = 32000
    audio, encoding = load_encoding(fname, sample_length)
    fastgen.synthesize(
        encoding,
        save_paths=['gen_' + fname],
        samples_per_save=sample_length)
    synthesis = utils.load_audio('gen_' + fname,
                                 sample_length=sample_length,
                                 sr=sr)

    # use image interpolation to stretch the encoding: (pip install scikit-image)
    from skimage.transform import resize

    def timestretch(encodings, factor):
        min_encoding, max_encoding = encoding.min(), encoding.max()
        encodings_norm = (encodings - min_encoding) / (max_encoding - min_encoding)
        timestretches = []
        for encoding_i in encodings_norm:
            stretched = resize(encoding_i, (int(encoding_i.shape[0] * factor), encoding_i.shape[1]), mode='reflect')
            stretched = (stretched * (max_encoding - min_encoding)) + min_encoding
            timestretches.append(stretched)
        return np.array(timestretches)

    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav'
    sample_length = 40000
    audio, encoding = load_encoding(fname, sample_length)

    audio = utils.load_audio('gen_slower_' + fname, sample_length=None, sr=sr)
    Audio(audio, rate=sr)

    encoding_slower = timestretch(encoding, 1.5)
    encoding_faster = timestretch(encoding, 0.5)

    fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True)
    axs[0].plot(encoding[0]);
    axs[0].set_title('Encoding (Normal Speed)')
    axs[1].plot(encoding_faster[0]);
    axs[1].set_title('Encoding (Faster))')
    axs[2].plot(encoding_slower[0]);
    axs[2].set_title('Encoding (Slower)')


    fastgen.synthesize(encoding_faster, save_paths=['gen_faster_' + fname])
    fastgen.synthesize(encoding_slower, save_paths=['gen_slower_' + fname])

    sample_length = 80000

    # from https://www.freesound.org/people/MustardPlug/sounds/395058/
    aud1, enc1 = load_encoding('395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav', sample_length)

    # from https://www.freesound.org/people/xserra/sounds/176098/
    aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav', sample_length)

    enc_mix = (enc1 + enc2) / 2.0

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0]);
    axs[1].set_title('Encoding 2')
    axs[2].plot(enc_mix[0]);
    axs[2].set_title('Average')

    fastgen.synthesize(enc_mix, save_paths='mix.wav')

    def fade(encoding, mode='in'):
        length = encoding.shape[1]
        fadein = (0.5 * (1.0 - np.cos(3.1415 * np.arange(length) /
                                      float(length)))).reshape(1, -1, 1)
        if mode == 'in':
            return fadein * encoding
        else:
            return (1.0 - fadein) * encoding

    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Original Encoding')
    axs[1].plot(fade(enc1, 'in')[0]);
    axs[1].set_title('Fade In')
    axs[2].plot(fade(enc1, 'out')[0]);
    axs[2].set_title('Fade Out')

    def crossfade(encoding1, encoding2):
        return fade(encoding1, 'out') + fade(encoding2, 'in')


    fig, axs = plt.subplots(3, 1, figsize=(10, 7))
    axs[0].plot(enc1[0]);
    axs[0].set_title('Encoding 1')
    axs[1].plot(enc2[0]);
    axs[1].set_title('Encoding 2')
    axs[2].plot(crossfade(enc1, enc2)[0]);
    axs[2].set_title('Crossfade')

    fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])
예제 #27
0
def crossfade():
    return fastgen.synthesize(_crossfade(enc1, enc2),
                              save_paths=['crossfade.wav'])
예제 #28
0
  plt.title('NSynth Encoding')
except Exception as e:
  print e

"""# Synthesize

On the GPU, this should take about 4 minutes per 1 second of audio per a batch.
"""

#@title Synthesize Interpolations
print('Total Iterations to Complete: %d\n' % SAMPLE_LENGTH)

encodings = np.array(z_list)
save_paths = ['/content/' + name + '.wav' for name in name_list]
fastgen.synthesize(encodings,
                   save_paths=save_paths,
                   checkpoint_path=ckpt_path,
                   samples_per_save=int(SAMPLE_LENGTH / 10))

#@title Download Interpolations
for fname in save_paths:
  print('Downloading: %s' % fname.split('/')[-1])
  download(fname)

#@title Listen to the outputs

print("Originals:\n")
for fname in file_list:
  synth_audio = utils.load_audio(fname,
                                 sample_length=SAMPLE_LENGTH,
                                 sr=SR)
  print(get_name(fname))
예제 #29
0
def Combine_Synth(file1, file2):
    sample_length = 20000  #Duration
    aud1, enc1 = load_encoding(file1, sample_length)
    aud2, enc2 = load_encoding(file2, sample_length)
    fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])