def predict_multitarget(model, input_filename, style_filename, output_filename): # Load melody audio = load_audio(input_filename, sr=DEFAULT_SAMPLING_RATE) mag, phase = forward_transform(audio) mag_db = amplitude_to_db(mag) mag_sliced = slice_magnitude(mag_db, IMG_DIM[1]) mag_sliced = (mag_sliced * 2) - 1 # Load style style = load_audio(style_filename, sr=DEFAULT_SAMPLING_RATE) style_mag, _ = forward_transform(style) style_mag_db = amplitude_to_db(style_mag) style_mag_sliced = slice_magnitude(style_mag_db, IMG_DIM[1]) # Take a random slice style_mag_sliced = style_mag_sliced[ np.random.choice(style_mag_sliced.shape[0]), :, :] style_mag_sliced = (style_mag_sliced * 2) - 1 style_mag_sliced = np.expand_dims(style_mag_sliced, axis=0) style_mag_sliced = np.repeat(style_mag_sliced, mag_sliced.shape[0], axis=0) # Concatenate [melody, style] input_data = np.concatenate([mag_sliced, style_mag_sliced], axis=3) prediction = model.predict(input_data) prediction = (prediction + 1) / 2 mag_db = join_magnitude_slices(prediction, phase.shape) mag = db_to_amplitude(mag_db) audio_out = inverse_transform(mag, phase) write_audio(output_filename, audio_out)
def predict(model, input_filename, output_filename): audio = load_audio(input_filename, sr=DEFAULT_SAMPLING_RATE) mag, phase = forward_transform(audio) mag_db = amplitude_to_db(mag) mag_sliced = slice_magnitude(mag_db, IMG_DIM[1]) mag_sliced = (mag_sliced * 2) - 1 prediction = model.predict(mag_sliced) prediction = (prediction + 1) / 2 mag_db = join_magnitude_slices(prediction, phase.shape) mag = db_to_amplitude(mag_db) audio_out = inverse_transform(mag, phase) write_audio(output_filename, audio_out)
def generate_audio(prediction, phase, output_name): mag_db = join_magnitude_slices(prediction, phase.shape) mag = db_to_amplitude(mag_db) audio = inverse_transform(mag, phase) write_audio(output_name, audio)