Beispiel #1
0
if training:
    model.fit(train_x, train_y, validation_set=((valid_x, valid_y)),
              show_metric=True, batch_size=batch_size, n_epoch=training_iters,
              snapshot_epoch=False, snapshot_step=1000, run_id=tf_id,
              callbacks=callback)
else:
    model.load(cp)

for i in xrange(amount_generated_sequences):
    random_index = random.randint(0, (len(train_x) - 1))

    impulse = np.array(train_x[random_index])
    predicted_magnitudes = impulse
    for j in xrange(sequence_max_length):
        impulse = np.array(impulse).reshape(1, x_frames.shape[1],
                                            x_frames.shape[2])
        prediction = model.predict(impulse)
        predicted_magnitudes = np.vstack((predicted_magnitudes, prediction))
        impulse = predicted_magnitudes[-sequence_length:]

    predicted_magnitudes = np.array(predicted_magnitudes)
    print i, predicted_magnitudes.shape
    phases = phase_gen.gen_phases(predicted_magnitudes.shape[0], fft_size,
                                  hop_size, sample_rate)
    audio = phase_gen.fft2samples(predicted_magnitudes, phases, hop_size)
    maxv = np.iinfo(np.int16).max
    audio_wav = (librosa.util.normalize(audio) * maxv).astype(np.int16)
    audio_name = tf_id + u'_' + unicode(i) + u'.wav'
    librosa.output.write_wav(audio_path + u'/' + audio_name, audio_wav,
                             sample_rate, norm=False)
#Get the data from the files
X = dataset['X'][:]
Y = dataset['Y'][:]
fftFrameSize = dataset['fftFrameSize'][()]
hop_length = dataset['hop_length'][()]
sample_rate = dataset['sample_rate'][()]

#The prediction sliding window will be the array that the model is made to predict new frames from.
mags = X[0]
mags = np.append(mags, Y, axis=0)
print("Dataset Y Shape: ", Y.shape)
print("Mags Shape: ", mags.shape)

print('Synthesising Audio')
#Generate some phases for evey new frame of predicted magnitdues using the phase gen.py library.
phases = phase_gen.gen_phases(mags.shape[0], fftFrameSize, hop_length, sample_rate)
print('Generated New Phases', phases.shape)

#convert all of the predicted magnitudes and generated phases back into samples with the correct hop length.
audio = phase_gen.fft2samples(mags, phases, hop_length)

#this is to stop librosa from saving the .wavs as 64-bit floats with no one can read and normalising it here because librosa's write to wav only normalises floats.
maxv = np.iinfo(np.int16).max
audio_wav = (librosa.util.normalize(audio) * maxv).astype(np.int16)

#Create a unique name and directory for the new audiofiles and write it to wav.
if not os.path.exists('datasets/phased'):
    os.makedirs('datasets/phased')

audio_name = dataset_name+'_'+'phased'+'_'+str(it_i)+'.wav'