예제 #1
0
hop_length = dataset['hop_length'][()]
sample_rate = dataset['sample_rate'][()]

#The prediction sliding window will be the array that the model is made to predict new frames from.
mags = X[0]
mags = np.append(mags, Y, axis=0)
print("Dataset Y Shape: ", Y.shape)
print("Mags Shape: ", mags.shape)

print('Synthesising Audio')
#Generate some phases for evey new frame of predicted magnitdues using the phase gen.py library.
phases = phase_gen.gen_phases(mags.shape[0], fftFrameSize, hop_length, sample_rate)
print('Generated New Phases', phases.shape)

#convert all of the predicted magnitudes and generated phases back into samples with the correct hop length.
audio = phase_gen.fft2samples(mags, phases, hop_length)

#this is to stop librosa from saving the .wavs as 64-bit floats with no one can read and normalising it here because librosa's write to wav only normalises floats.
maxv = np.iinfo(np.int16).max
audio_wav = (librosa.util.normalize(audio) * maxv).astype(np.int16)

#Create a unique name and directory for the new audiofiles and write it to wav.
if not os.path.exists('datasets/phased'):
    os.makedirs('datasets/phased')

audio_name = dataset_name+'_'+'phased'+'_'+str(it_i)+'.wav'

while os.path.exists('datasets/phased/'+audio_name):
	it_i += 1
	audio_name = dataset_name+'_'+'phased'+'_'+str(it_i)+'.wav'
예제 #2
0
if training:
    model.fit(train_x, train_y, validation_set=((valid_x, valid_y)),
              show_metric=True, batch_size=batch_size, n_epoch=training_iters,
              snapshot_epoch=False, snapshot_step=1000, run_id=tf_id,
              callbacks=callback)
else:
    model.load(cp)

for i in xrange(amount_generated_sequences):
    random_index = random.randint(0, (len(train_x) - 1))

    impulse = np.array(train_x[random_index])
    predicted_magnitudes = impulse
    for j in xrange(sequence_max_length):
        impulse = np.array(impulse).reshape(1, x_frames.shape[1],
                                            x_frames.shape[2])
        prediction = model.predict(impulse)
        predicted_magnitudes = np.vstack((predicted_magnitudes, prediction))
        impulse = predicted_magnitudes[-sequence_length:]

    predicted_magnitudes = np.array(predicted_magnitudes)
    print i, predicted_magnitudes.shape
    phases = phase_gen.gen_phases(predicted_magnitudes.shape[0], fft_size,
                                  hop_size, sample_rate)
    audio = phase_gen.fft2samples(predicted_magnitudes, phases, hop_size)
    maxv = np.iinfo(np.int16).max
    audio_wav = (librosa.util.normalize(audio) * maxv).astype(np.int16)
    audio_name = tf_id + u'_' + unicode(i) + u'.wav'
    librosa.output.write_wav(audio_path + u'/' + audio_name, audio_wav,
                             sample_rate, norm=False)
예제 #3
0
            #For every frame of magnitudes
            for i in range(magnitude_t.shape[0]):
                #Append the frame to the fft bank.
                fft_bank.append(magnitude_t[i])

    #Convert the fft bank list into a numpy array.
    fft_bank = np.array(fft_bank)

    print('Synthesising Audio')
    #Generate some phases for evey new frame of predicted magnitdues using the phase gen.py library.
    phases = phase_gen.gen_phases(fft_bank.shape[0], fftFrameSize, hop_length,
                                  sample_rate)
    print('Generated New Phases', phases.shape)

    #convert all of the predicted magnitudes and generated phases back into samples with the correct hop length.
    audio = phase_gen.fft2samples(fft_bank, phases, hop_length)

    #this is to stop librosa from saving the .wavs as 64-bit floats with no one can read and normalising it here because librosa's write to wav only normalises floats.
    maxv = np.iinfo(np.int16).max
    audio_wav = (librosa.util.normalize(audio) * maxv).astype(np.int16)

    #Add one on to the user defined sequence length.
    sequence_length = sequence_length + 1

    sequences = []
    #Make a bunch of sequences of the FFT bank that are one more than the sequence length.
    for i in range(len(fft_bank) - sequence_length):
        sequences.append(fft_bank[i:i + sequence_length])

    sequences = np.array(sequences)