def test_interpret_one_hot_on_A0(self): midi_pitch_input = 21 one_hot = encoder.one_hot_encode_midi_pitch(midi_pitch_input) print() midi_pitch_output = encoder.interpret_one_hot(one_hot, printing=True) print( f'midi_pitch_input: {midi_pitch_input:>3} midi_pitch_output: {midi_pitch_output:>3}' ) self.assertEqual(midi_pitch_input, midi_pitch_output)
def test_interpret_one_hot_on_rest(self): midi_pitch_input = encoder.REST_MIDI_ENCODING one_hot = encoder.one_hot_encode_midi_pitch(midi_pitch_input) print() midi_pitch_output = encoder.interpret_one_hot(one_hot, printing=True) print( f'midi_pitch_input: {midi_pitch_input:>3} midi_pitch_output: {midi_pitch_output:>3}' ) self.assertEqual(midi_pitch_input, midi_pitch_output)
def test_interpret_one_hot_on_putting_lows_last(self): midi_pitch_input = 60 one_hot = encoder.one_hot_encode_midi_pitch(midi_pitch_input, low_last=True) print() midi_pitch_output = encoder.interpret_one_hot(one_hot, low_last=True, printing=True) print( f'midi_pitch_input: {midi_pitch_input:>3} midi_pitch_output: {midi_pitch_output:>3}' ) self.assertEqual(midi_pitch_input, midi_pitch_output)
def show_example_prediction(model_name, i=0, version=1, x_val=None, y_val=None, printing_in_full=False): if x_val is None or y_val is None: _, _, x_val, y_val = load_data_arrays(version) example = (x_val[i], y_val[i]) model_input = example[0].reshape(1, example[0].shape[0], 1) example_prediction = load_model_and_get_predictions( model_name, model_input)[0] example_ground_truth = example[1] i_prediction = np.argmax(example_prediction) i_ground_truth = np.argmax(example_ground_truth) midi_pitch_prediction = interpret_one_hot(example_prediction) midi_pitch_ground_truth = interpret_one_hot(example_ground_truth) note_name_prediction = get_note_name(midi_pitch_prediction) note_name_ground_truth = get_note_name(midi_pitch_ground_truth) if printing_in_full: print(f'\nmodel_input: {model_input.shape}') print(model_input) print(f'\nexample_prediction: {example_prediction.shape}') print(example_prediction) print(f'\nexample_ground_truth: {example_ground_truth.shape}') print(example_ground_truth) print() print(f'val instance {i}') print( f' i_prediction: {i_prediction:>4} i_ground_truth: {i_ground_truth:>4}' ) print( f'midi_pitch_prediction: {midi_pitch_prediction:>4} midi_pitch_ground_truth: {midi_pitch_ground_truth:>4}' ) print( f' note_name_prediction: {note_name_prediction:>4} note_name_ground_truth: {note_name_ground_truth:>4}' )
def plot_wav_prediction(note, example, model_name, method='scipy', printing=False, plotting_spectrogram=False, showing=True): wav_file = f'wav_files/single_{note}_{example}.wav' model = load_model(model_name) if method == 'scipy': if plotting_spectrogram: _, times, spectrogram = plot_spectrogram(wav_file, strategy='scipy', showing=False) else: _, times, spectrogram = get_spectrogram(wav_file, strategy='scipy') else: if plotting_spectrogram: _, times, spectrogram = plot_spectrogram(wav_file, strategy='pyplot', showing=False) else: _, times, spectrogram = get_spectrogram(wav_file, strategy='pyplot') if printing: print(spectrogram.shape) midi_pitch_predictions = np.zeros(spectrogram.shape[1]) for i in range(spectrogram.shape[1]): periodogram = spectrogram[:, i].reshape(1, spectrogram.shape[0], 1) pitch_probabilities = model.predict(periodogram)[0] midi_pitch_predictions[i] = interpret_one_hot(pitch_probabilities) if printing: print(midi_pitch_predictions[i]) print(pitch_probabilities) plt.figure() plt.title(f'Prediction of \"{wav_file}\"\nby model \"{model_name}\"') plt.plot(times, midi_pitch_predictions) plt.xlabel('time (seconds)') plt.ylabel('MIDI pitch') plt.ylim(-2, 109) if showing: plt.show()
def predict_each_window_of_wav_file(file_name, wav_path='wav_files', adding_spectral_powers=True, normalising=True, window_size=25, model_name=None, printing=False): if wav_path is None: wav_file_full_path = f'{file_name}.wav' else: wav_file_full_path = f'{wav_path}/{file_name}.wav' # get the spectrogram of the file and swap the axes to get an array of periodograms _, _, spectrogram = get_spectrogram(wav_file_full_path, window_size=window_size) periodograms = np.swapaxes(spectrogram, 0, 1) if adding_spectral_powers: periodograms = add_spectral_powers(periodograms) periodograms = periodograms.reshape( (periodograms.shape[0], periodograms.shape[1], 1)) if normalising: periodograms = normalise( periodograms, spectral_powers_present=adding_spectral_powers, first_order_differences_present=False) # load the specified model and use it to predict the pitch at each window model = load_model(model_name) probabilities = model.predict(periodograms) predictions = np.empty(len(probabilities), dtype=object) for i in range(len(probabilities)): predictions[i] = interpret_one_hot(probabilities[i], encoding=None) if printing: print(predictions.shape) print(predictions) return predictions
def create_comparison_text_file(file_name, model_name, window_size=50, wav_path='wav_files', xml_path='xml_files', adding_spectral_powers=True, normalising=True, save_name=None, printing=False): if save_name is None: save_name = file_name if wav_path is None: wav_file_full_path = f'{file_name}.wav' else: wav_file_full_path = f'{wav_path}/{file_name}.wav' # get the ground-truth pitch for the file ground_truth = get_monophonic_ground_truth(file_name, wav_path=wav_path, xml_path=xml_path, window_size=window_size) # get the spectrogram of the file and swap the axes to get an array of periodograms _, _, spectrogram = get_spectrogram(wav_file_full_path, window_size=window_size) periodograms = np.swapaxes(spectrogram, 0, 1) if adding_spectral_powers: periodograms = add_spectral_powers(periodograms) periodograms = periodograms.reshape( (periodograms.shape[0], periodograms.shape[1], 1)) if normalising: periodograms = normalise( periodograms, spectral_powers_present=adding_spectral_powers, first_order_differences_present=False) # load the specified model and use it to predict the pitch at each window model = load_model(model_name) probabilities = model.predict(periodograms) predictions = np.empty(len(probabilities), dtype=object) for i in range(len(probabilities)): predictions[i] = interpret_one_hot(probabilities[i], encoding=None) # write the ground truth pitches and pitch predictions to a text file f = open(f'txt_files/{save_name}.txt', 'w') f.write(' time step: ') for time_step in range(len(ground_truth)): f.write(f'{time_step:<5}') f.write('\n') f.write(' ground truth: ') for pitch in ground_truth: f.write(f'{pitch:<5}') f.write('\n') f.write('model predictions: ') for pitch in predictions: f.write(f'{pitch:<5}') f.close() if printing: print(spectrogram.shape) print(spectrogram) print() print(predictions.shape) print(predictions) print() print(ground_truth.shape) print(ground_truth)