Ejemplo n.º 1
0
def generate(model_path,
             model_name,
             generate_path,
             generate_name,
             start_piece=None,
             sr=16000,
             duration=10):
    if os.path.exists(generate_path) is False:
        os.makedirs(generate_path)
    with open('./params/wavenet_params.json', 'r') as f:
        params = json.load(f)
    f.close()
    net = wavenet(**params)
    net = load_model(net, model_path, model_name)
    if start_piece is None:
        start_piece = torch.zeros(1, 256, net.receptive_field)
        start_piece[:, 128, :] = 1.0
        start_piece = Variable(start_piece)
    note_num = duration * sr
    note = start_piece
    state_queue = None
    generated_piece = []
    for i in range(note_num):
        note, state_queue = predict_next(net, note, state_queue)
        note = note[0]
        generated_piece.append(note)
        temp = torch.zeros(1, net.quantization_channels, 1)
        temp[:, note, :] = 1.0
        note = Variable(temp)
    print(generated_piece)
    generated_piece = torch.LongTensor(generated_piece)
    generated_piece = mu_law_decode(generated_piece, net.quantization_channels)
    generated_piece = generated_piece.numpy()
    wav_name = generate_path + generate_name
    librosa.output.write_wav(wav_name, generated_piece, sr=sr)
Ejemplo n.º 2
0
def generate(model_path,
             model_name,
             generate_path,
             generate_name,
             start_piece=None,
             sr=16000,
             duration=10):
    if os.path.exists(generate_path) is False:
        os.makedirs(generate_path)
    with open('./params/model_params.json') as f:
        model_params = json.load(f)
    f.close()
    net = wavenet_autoencoder(**model_params)
    net = load_model(net, model_path, model_name)
    cuda_available = torch.cuda.is_available()
    if cuda_available is True:
        net = net.cuda()


#	print(net.receptive_field)
    if start_piece is None:
        start_piece = torch.zeros(1, 256, net.receptive_field + 512)
        start_piece[:, 128, :] = 1.0
        start_piece = Variable(start_piece)
    if cuda_available is True:
        start_piece = start_piece.cuda()
    note_num = duration * sr
    note = start_piece
    state_queue = None
    generated_piece = []
    input_wav = start_piece
    for i in range(note_num):
        print(i)
        predict_note = predict_next(net, input_wav)
        generated_piece.append(note)
        temp = torch.zeros(net.quantization_channel, 1)
        temp[predict_note] = 1
        temp = temp.view(1, net.quantization_channel, 1)
        #		temp = torch.zeros(1, net.quantization_channel, 1)
        #		temp[:, predict_note, :] = 1.0
        note = Variable(temp)
        note = note.cuda()
        #		print(note.size())
        #		print(input_wav.size())
        input_wav = torch.cat(
            (input_wav[:, -net.receptive_field - 511:], note), 2)
    print(generated_piece)
    generated_piece = torch.LongTensor(generated_piece)
    generated_piece = mu_law_decode(generated_piece, net.quantization_channel)
    generated_piece = generated_piece.numpy()
    wav_name = generate_path + generate_name
    librosa.output.write_wav(wav_name, generated_piece, sr=sr)
Ejemplo n.º 3
0
def decode(model_path,model_name, encoding, decoder_path, decoder_name, sr = 16000,duration=10):
    
    """Synthesize audio from an array of embeddings.
    
    Args:
    encodings: Numpy array with shape [batch_size, time, dim].
    save_paths: Iterable of output file names.
    checkpoint_path: Location of the pretrained model. [model.ckpt-200000]
    samples_per_save: Save files after every amount of generated samples.

    """
    
    if os.path.exists(decoder_path) is False:
        os.makedirs(decoder_path)
    with open('./params/model_params.json') as f :
        model_params = json.load(f)
    f.close()
    net = WavenetAutoencoder(**model_params)
    net = load_model(net,model_path,model_name)
    cuda_available = torch.cuda.is_available()
    if cuda_available is True:
        net = net.cuda()
    net.eval()

    start_piece = torch.zeros(1, 256, net.receptive_field+512)
    start_piece[:, 128, :] = 1.0
    start_piece = Variable(start_piece, volatile=True)
    
    # Load Encoding
    encoding_ndarray = np.load(encoding)
    _, encoding_channels, encoding_length = encoding_ndarray.shape
    hop_size = model_params['en_pool_kernel_size']
    total_length = encoding_length * hop_size

    encoding = torch.from_numpy(encoding_ndarray).contiguous()
    encoding = Variable(encoding, volatile=True)
    #generated_piece = np.zeros(( batch_size, total_length,), dtype=np.float32)
    #audio = np.zeros([batch_size, 1])
    
    if cuda_available is True:
        start_piece = start_piece.cuda()
    note = start_piece
    generated_piece = []
    input_wav = start_piece
    
    # Should be torch.Size([1, 256, 4606])
    #print(input_wav.data.size())
    
    for i in range(total_length):
        print(i)
        enc_i = encoding[:, :, i].unsqueeze(2)
        predict_note = decode_next(net, input_wav, enc_i, hop_size)
        generated_piece.append(note)
        temp = torch.zeros(net.quantization_channel,1)
        temp[predict_note] =1
        temp = temp.view(1,net.quantization_channel,1)
        #		temp = torch.zeros(1, net.quantization_channel, 1)
        #		temp[:, predict_note, :] = 1.0
        note = Variable(temp)
        if cuda_available is True:
            note = note.cuda()
        #		print(note.size())
        print(input_wav.data.size())
        input_wav = torch.cat((input_wav[:,-net.receptive_field-511:],note), 2)
    print(generated_piece)
    generated_piece = torch.LongTensor(generated_piece)
    generated_piece = mu_law_decode(generated_piece,
                            net.quantization_channel)
    generated_piece = generated_piece.numpy()
    wav_name = generate_path + generate_name
    librosa.output.write_wav(wav_name, generated_piece, sr=sr)
Ejemplo n.º 4
0
def generate(model_path,
             model_name,
             generate_path,
             generate_name,
             start_piece=None,
             sr=16000,
             duration=2):
    if os.path.exists(generate_path) is False:
        os.makedirs(generate_path)
    with open('./params/model_params.json') as f:
        model_params = json.load(f)
    f.close()
    net = wavenet_autoencoder(**model_params)
    net = load_model(net, model_path, model_name)
    cuda_available = torch.cuda.is_available()
    if cuda_available is True:
        net = net.cuda()
#	print(net.receptive_field)
    if start_piece is None:
        data = open('../np_audio.pkl', 'rb')
        data = pickle.load(data)
        data = np.array(data)
        data = data[0]
        data = torch.from_numpy(data)
        data = data[-net.receptive_field - 512:]
        start_piece = torch.zeros(256, net.receptive_field + 512)
        start_piece[data.numpy(), np.arange(net.receptive_field + 512)] = 1
        #		start_piece = torch.from_numpy(start_piece)
        start_piece = start_piece.view(1, 256, net.receptive_field + 512)
        start_piece = Variable(start_piece)


#		del data
#	start_piece = torch.zeros(1, 256, net.receptive_field+512)
#	start_piece[:, 128, :] = 1.0
#	start_piece = Variable(start_piece)
    if cuda_available is True:
        start_piece = start_piece.cuda()
    note_num = duration * sr
    note = start_piece
    state_queue = None
    generated_piece = []
    input_wav = start_piece
    for i in range(note_num):
        print(i)
        predict_note = predict_next(net, input_wav)
        generated_piece.append(predict_note)
        temp = torch.zeros(net.quantization_channel, 1)
        temp[predict_note] = 1
        temp = temp.view(1, net.quantization_channel, 1)
        #		temp = torch.zeros(1, net.quantization_channel, 1)
        #		temp[:, predict_note, :] = 1.0
        note = Variable(temp)
        note = note.cuda()
        #	#	print(note.size())
        #	#	print(input_wav.size())
        input_wav = torch.cat(
            (input_wav[:, -net.receptive_field - 511:], note), 2)
    print(generated_piece)
    generated_piece = torch.LongTensor(generated_piece)
    generated_piece = mu_law_decode(generated_piece, 256)
    #	generated_piece = generated_piece.numpy()
    generated_piece = np.array(generated_piece)
    output = open(generate_path + 'generated_piece.pkl', 'wb')
    pickle.dump(generated_piece, output)
    wav_name = generate_path + generate_name
    librosa.output.write_wav(wav_name, generated_piece, sr=sr)