예제 #1
0
def generate_audio():
    receptive_field_width_steps = 5

    batch_size = 1
    max_dilation = max(params.residual_conv_dilations)
    target_width = receptive_field_width_steps
    padded_input_width = receptive_field_width_steps + max_dilation

    # quantized signals generated by WaveNet
    generated_quantized_audio = np.zeros((padded_input_width, ),
                                         dtype=np.int32)

    for time_step in xrange(200):
        # quantized signals in receptive field
        padded_quantized_x_batch = generated_quantized_audio[
            -padded_input_width:].reshape((1, -1))

        # convert to image
        padded_x_batch = data.onehot_pixel_image(
            padded_quantized_x_batch, quantized_channels=params.audio_channels)

        # generate next signal
        softmax = wavenet.forward_one_step(padded_x_batch,
                                           softmax=True,
                                           return_numpy=True)
        softmax = softmax[0, :, 0, -1]
        generated_quantized_signal = np.random.choice(np.arange(
            params.audio_channels),
                                                      p=softmax)
        generated_quantized_audio = np.append(generated_quantized_audio,
                                              [generated_quantized_signal],
                                              axis=0)

    print generated_quantized_audio
예제 #2
0
def main():
    # compute required input width
    num_layers = len(params.residual_conv_channels)
    receptive_steps_per_unit = params.residual_conv_filter_width**num_layers
    receptive_steps = (receptive_steps_per_unit -
                       1) * params.residual_num_blocks + 1
    input_width = receptive_steps
    # padding for causal conv block
    input_width += len(params.causal_conv_channels)

    # quantized signals generated by WaveNet
    generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32)

    for time_step in xrange(1000):
        # quantized signals in receptive field
        padded_quantized_x_batch = generated_quantized_audio[
            -input_width:].reshape((1, -1))

        # convert to image
        padded_x_batch = data.onehot_pixel_image(
            padded_quantized_x_batch,
            quantization_steps=params.quantization_steps)

        # generate next signal
        softmax = wavenet.forward_one_step(padded_x_batch,
                                           apply_softmax=True,
                                           as_numpy=True)
        softmax = softmax[0, :, 0, -1]
        generated_quantized_signal = np.random.choice(np.arange(
            params.quantization_steps),
                                                      p=softmax)
        generated_quantized_audio = np.append(generated_quantized_audio,
                                              [generated_quantized_signal],
                                              axis=0)
        print generated_quantized_signal,
예제 #3
0
def generate_audio():
	# compute receptive field width
	learnable_steps = 1
	batch_size = 1
	num_layers = len(params.residual_conv_channels)
	receptive_steps_per_unit = params.residual_conv_filter_width ** num_layers
	receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1
	target_width = learnable_steps
	input_width = receptive_steps
	# to compute all learnable targets
	input_width += learnable_steps - 1
	## padding for causal conv block
	input_width += len(params.causal_conv_channels)

	# quantized signals generated by WaveNet
	generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32)

	start = time.time()
	for time_step in xrange(9):
		# quantized signals in receptive field
		padded_quantized_x_batch = generated_quantized_audio[-input_width:].reshape((1, -1))

		# convert to image
		padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantization_steps=params.quantization_steps)

		# generate next signal
		softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		softmax = softmax[0, :, 0, -1]
		generated_quantized_signal = np.argmax(softmax)
		generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0)
		print generated_quantized_signal,

	print generated_quantized_audio
	print time.time() - start

	wavenet.prev_causal_outputs = None
	wavenet.prev_residual_outputs_out = None
	wavenet.prev_residual_outputs_z = None
	generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32)

	start = time.time()
	for time_step in xrange(9):
		# quantized signals in receptive field
		padded_quantized_x_batch = generated_quantized_audio[-input_width:].reshape((1, -1))

		# convert to image
		padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantization_steps=params.quantization_steps)

		# generate next signal
		softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		softmax = softmax[0, :, 0, -1]
		generated_quantized_signal = np.argmax(softmax)
		generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0)
		print generated_quantized_signal,

	print generated_quantized_audio
	print time.time() - start
예제 #4
0
def generate_audio(sampling_rate=48000, generate_sec=1, remove_silence_frames=False):
	batch_size = 1

	# compute required input width
	num_layers = len(params.residual_conv_channels)
	receptive_steps_per_unit = params.residual_conv_filter_width ** num_layers
	receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1
	input_width = receptive_steps
	# padding for causal conv block
	input_width += len(params.causal_conv_channels)

	# quantized signals generated by WaveNet
	generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32)

	start_time = time.time()
	for time_step in xrange(1, int(sampling_rate * generate_sec)):
		# quantized signals in receptive field
		padded_quantized_x_batch = generated_quantized_audio[-input_width:].reshape((1, -1))

		# convert to image
		padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantization_steps=params.quantization_steps)

		# generate next signal
		if args.use_faster_wavenet:
			softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		else:
			softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True)

		softmax = softmax[0, :, 0, -1]
		generated_quantized_signal = np.random.choice(np.arange(params.quantization_steps), p=softmax)

		if generated_quantized_signal == 0 and remove_silence_frames:
			pass
		else:
			generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0)

		if time_step % 10 == 0:
			sys.stdout.write("\rgenerating {:.2f} msec / {:.2f} msec".format(time_step * 1000.0 / sampling_rate, generate_sec * 1000.0))
			sys.stdout.flush()

	print "\ndone in {:.3f} sec".format(time.time() - start_time)

	# remove zero paddings
	generated_quantized_audio = generated_quantized_audio[input_width:]

	try:
		os.mkdir(args.generate_dir)
	except:
		pass

	filename = "{}/generated.wav".format(args.generate_dir)
	data.save_audio_file(filename, generated_quantized_audio, params.quantization_steps, format="16bit_pcm", sampling_rate=sampling_rate)
예제 #5
0
def generate_audio(sampling_rate=48000, generate_sec=1, remove_silence_frames=False):
	# compute required input width
	num_layers = len(params.residual_conv_channels)
	receptive_steps_per_unit = params.residual_conv_filter_width ** num_layers
	receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1
	input_width = receptive_steps
	# add paddings of causal conv block
	input_width += len(params.causal_conv_channels)

	# pad with silence signals
	generated_signals = np.full((input_width, ), 127, dtype=np.int32)

	start_time = time.time()
	for time_step in xrange(1, int(sampling_rate * generate_sec)):
		# signals in receptive field
		input_signals = generated_signals[-input_width:].reshape((1, -1))

		# convert to image
		input_signals = data.onehot_pixel_image(input_signals, quantization_steps=params.quantization_steps)

		# generate next signal
		if args.fast:
			softmax = wavenet._forward_one_step(input_signals, apply_softmax=True, as_numpy=True)
		else:
			softmax = wavenet.forward_one_step(input_signals, apply_softmax=True, as_numpy=True)


		softmax = softmax[0, :, 0, -1]
		signal = np.random.choice(np.arange(params.quantization_steps), p=softmax)

		if signal == 127 and remove_silence_frames:
			pass
		else:
			generated_signals = np.append(generated_signals, [signal], axis=0)

		if time_step % 10 == 0:
			sys.stdout.write("\rgenerating {:.2f} msec / {:.2f} msec".format(time_step * 1000.0 / sampling_rate, generate_sec * 1000.0))
			sys.stdout.flush()

	print "\ndone in {:.3f} sec".format(time.time() - start_time)

	# remove paddings
	generated_signals = generated_signals[input_width:]

	try:
		os.mkdir(args.output_dir)
	except:
		pass

	filename = "{}/generated.wav".format(args.output_dir)
	data.save_audio_file(filename, generated_signals, params.quantization_steps, format="16bit_pcm", sampling_rate=sampling_rate)
예제 #6
0
def generate_audio():
	receptive_field_width_steps = 5

	batch_size = 1
	max_dilation = max(params.residual_conv_dilations)
	target_width = receptive_field_width_steps
	padded_input_width = receptive_field_width_steps + max_dilation

	# quantized signals generated by WaveNet
	generated_quantized_audio = np.mod(np.arange(1, padded_input_width + 1), 6).astype(np.int32)

	start = time.time()
	for time_step in xrange(500):
		# quantized signals in receptive field
		padded_quantized_x_batch = generated_quantized_audio[-padded_input_width:].reshape((1, -1))

		# convert to image
		padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantized_channels=params.audio_channels)

		# generate next signal
		softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		softmax = softmax[0, :, 0, -1]
		generated_quantized_signal = np.argmax(softmax)
		generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0)

	print generated_quantized_audio
	print time.time() - start

	wavenet.prev_causal_outputs = None
	wavenet.prev_residual_outputs_out = None
	wavenet.prev_residual_outputs_z = None
	generated_quantized_audio = np.mod(np.arange(1, padded_input_width + 1), 6).astype(np.int32)

	start = time.time()
	for time_step in xrange(500):
		# quantized signals in receptive field
		padded_quantized_x_batch = generated_quantized_audio[-padded_input_width:].reshape((1, -1))

		# convert to image
		padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantized_channels=params.audio_channels)

		# generate next signal
		softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		softmax = softmax[0, :, 0, -1]
		generated_quantized_signal = np.argmax(softmax)
		generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0)

	print generated_quantized_audio
	print time.time() - start
예제 #7
0
def generate_audio(receptive_field_width_ms=25, sampling_rate=48000, generate_duration_sec=1):
	# e.g.
	# 48000 Hz * 0.25 = 12000 time steps (= 250 milliseconds receptive field)
	receptive_steps = int(sampling_rate * receptive_field_width_ms / 1000.0)

	# compute required input width
	batch_size = 1
	max_dilation = max(params.residual_conv_dilations)
	target_width = receptive_steps
	padded_input_width = receptive_steps + max_dilation * (params.residual_conv_kernel_width - 1)

	# quantized signals generated by WaveNet
	generated_quantized_audio = np.zeros((padded_input_width, ), dtype=np.int32)

	start_time = time.time()

	for time_step in xrange(1, int(sampling_rate * generate_duration_sec)):
		# quantized signals in receptive field
		padded_quantized_x_batch = generated_quantized_audio[-padded_input_width:].reshape((1, -1))

		# convert to image
		padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantized_channels=params.audio_channels)

		# generate next signal
		if args.use_faster_wavenet:
			softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		else:
			softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True)
		softmax = softmax[0, :, 0, -1]
		generated_quantized_signal = np.random.choice(np.arange(params.audio_channels), p=softmax)
		generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0)

		if time_step % 10 == 0:
			sys.stdout.write("\rgenerating {:.2f} msec / {:.2f} msec".format(time_step * 1000.0 / sampling_rate, generate_duration_sec * 1000.0))
			sys.stdout.flush()

	print "\ndone in {:.3f} sec".format(time.time() - start_time)

	# remove zero paddings
	generated_quantized_audio = generated_quantized_audio[padded_input_width:]

	try:
		os.mkdir(args.generate_dir)
	except:
		pass

	filename = "{}/generated.wav".format(args.generate_dir)
	data.save_audio_file(filename, generated_quantized_audio, params.audio_channels, format="16bit_pcm", sampling_rate=sampling_rate)
예제 #8
0
from scipy.io import wavfile
import numpy as np
import os, sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
from args import args
from model import params, wavenet

input_size = 5
batchsize = 3
data = np.arange(1,
                 batchsize * params.audio_channels * input_size + 1).reshape(
                     (batchsize, params.audio_channels, 1,
                      input_size)).astype(np.float32)
wavenet.forward_one_step(data)