def predict_multitarget(model, input_filename, style_filename,
                        output_filename):
    # Load melody
    audio = load_audio(input_filename, sr=DEFAULT_SAMPLING_RATE)
    mag, phase = forward_transform(audio)
    mag_db = amplitude_to_db(mag)
    mag_sliced = slice_magnitude(mag_db, IMG_DIM[1])
    mag_sliced = (mag_sliced * 2) - 1

    # Load style
    style = load_audio(style_filename, sr=DEFAULT_SAMPLING_RATE)
    style_mag, _ = forward_transform(style)
    style_mag_db = amplitude_to_db(style_mag)
    style_mag_sliced = slice_magnitude(style_mag_db, IMG_DIM[1])

    # Take a random slice
    style_mag_sliced = style_mag_sliced[
        np.random.choice(style_mag_sliced.shape[0]), :, :]
    style_mag_sliced = (style_mag_sliced * 2) - 1
    style_mag_sliced = np.expand_dims(style_mag_sliced, axis=0)
    style_mag_sliced = np.repeat(style_mag_sliced, mag_sliced.shape[0], axis=0)

    # Concatenate [melody, style]
    input_data = np.concatenate([mag_sliced, style_mag_sliced], axis=3)
    prediction = model.predict(input_data)
    prediction = (prediction + 1) / 2

    mag_db = join_magnitude_slices(prediction, phase.shape)
    mag = db_to_amplitude(mag_db)
    audio_out = inverse_transform(mag, phase)
    write_audio(output_filename, audio_out)
Exemple #2
0
def predict(model, input_filename, output_filename):
    audio = load_audio(input_filename, sr=DEFAULT_SAMPLING_RATE)
    mag, phase = forward_transform(audio)
    mag_db = amplitude_to_db(mag)
    mag_sliced = slice_magnitude(mag_db, IMG_DIM[1])
    mag_sliced = (mag_sliced * 2) - 1

    prediction = model.predict(mag_sliced)
    prediction = (prediction + 1) / 2

    mag_db = join_magnitude_slices(prediction, phase.shape)
    mag = db_to_amplitude(mag_db)
    audio_out = inverse_transform(mag, phase)
    write_audio(output_filename, audio_out)
def generate_audio(prediction, phase, output_name):
    mag_db = join_magnitude_slices(prediction, phase.shape)
    mag = db_to_amplitude(mag_db)
    audio = inverse_transform(mag, phase)
    write_audio(output_name, audio)