def reshape_split_data_for_rnn(x_train, y_train, x_val, y_val, maximum_number_of_windows, target_encoding='label', printing=False): maximum_number_of_windows += 2 # add two to account for the obligatory BoF and EoF markers number_of_bins = x_train[0].shape[1] x_train_reshaped = np.zeros(shape=(len(x_train), maximum_number_of_windows, number_of_bins)) y_train_reshaped = np.zeros(shape=(len(y_train), maximum_number_of_windows, 1)) x_val_reshaped = np.zeros(shape=(len(x_val), maximum_number_of_windows, number_of_bins)) y_val_reshaped = np.zeros(shape=(len(y_val), maximum_number_of_windows, 1)) if target_encoding == 'midi_pitch' or target_encoding == 'label': y_train_reshaped = y_train_reshaped.astype(int) y_val_reshaped = y_val_reshaped.astype(int) bof_label = encode_ground_truth_array(np.array(['BoF']), desired_encoding='label')[0] eof_label = encode_ground_truth_array(np.array(['EoF']), desired_encoding='label')[0] # initialise all arrays by filling them with EoF data x_train_reshaped[:, :] = get_eof_artificial_periodogram(number_of_bins) y_train_reshaped[:, :] = eof_label x_val_reshaped[:, :] = get_eof_artificial_periodogram(number_of_bins) y_val_reshaped[:, :] = eof_label # set the first window of every file to BoF data x_train_reshaped[:, 0] = get_bof_artificial_periodogram(number_of_bins) y_train_reshaped[:, 0] = bof_label x_val_reshaped[:, 0] = get_bof_artificial_periodogram(number_of_bins) y_val_reshaped[:, 0] = bof_label # for each sample in x_train, put its periodograms into x_train_reshaped for i in range(len(x_train)): x_train_i = np.array(x_train[i]).reshape(x_train[i].shape[:-1]) # for the ith sample, from the second to the penultimate window, set the periodograms to those of x_train_i x_train_reshaped[i, 1:1 + len(x_train_i), :] = x_train_i y_train_reshaped[i, 1:1 + len(x_train_i), :] = y_train[i].reshape((y_train[i].shape[0], 1)) assert y_train_reshaped[i, 0, 0] == BoF_LABEL_ENCODING for i in range(len(x_val)): x_val_i = np.array(x_val[i]).reshape(x_val[i].shape[:-1]) x_val_reshaped[i, 1:1+len(x_val_i), :] = x_val_i y_val_reshaped[i, 1:1+len(x_val_i), :] = y_val[i].reshape((y_val[i].shape[0], 1)) assert y_val_reshaped[i, 0, 0] == BoF_LABEL_ENCODING if printing: print(f'maximum_number_of_windows: {maximum_number_of_windows - 2} + 2 = {maximum_number_of_windows}') print(f' number of bins: {number_of_bins}') print(f' BoF label: {bof_label}') print(f' EoF label: {eof_label}\n') print(f' x_train_reshaped[0]: {x_train_reshaped[0].shape}\n{x_train_reshaped[0]}\n') print(f' y_train_reshaped[0]: {y_train_reshaped[0].shape}\n{y_train_reshaped[0]}\n\n') print_split_data(x_train_reshaped, y_train_reshaped, x_val_reshaped, y_val_reshaped) return x_train_reshaped, y_train_reshaped, x_val_reshaped, y_val_reshaped
def flatten_array_of_arrays(array_of_arrays, inserting_file_separators=False, encoding=None, features=True, printing=False, deep_printing=False): flattened_data = list() channels_present = len(array_of_arrays[0].shape) == 3 number_of_channels = None if channels_present: number_of_channels = array_of_arrays[0].shape[2] for i in range(len(array_of_arrays)): for periodogram in array_of_arrays[i]: flattened_data.insert(0, periodogram) if inserting_file_separators: if features: if deep_printing: print(len(array_of_arrays[0][1])) if channels_present: flattened_data.insert(0, np.full((len(array_of_arrays[0][1]), number_of_channels), -1)) else: flattened_data.insert(0, np.full(len(array_of_arrays[0][1]), -1)) else: if deep_printing: print('EoF') end_of_file_marker = encode_ground_truth_array('EoF', current_encoding=None, desired_encoding=encoding) flattened_data.insert(0, end_of_file_marker) if printing: print(f'array_of_arrays[0].shape: {array_of_arrays[0].shape}') print(f' len(flattened_data): {len(flattened_data)}') if type(flattened_data[0]) is not str: print(f' flattened_data[0]: {flattened_data[0].shape}\n{flattened_data[0]}\n') else: print(f' type(flattened_data[0]): {type(flattened_data[0])}') print(f' flattened_data[0]: {flattened_data[0]}\n') flattened_data = np.array(flattened_data)[::-1] return flattened_data
def test_decode_label(self): encoded_array = np.array([89, 0, 40, 12, 82, 90]) decoded_array = encoder.encode_ground_truth_array( encoded_array, current_encoding='label', desired_encoding=None) print(f'\n\nencoded array: {encoded_array.shape}\n{encoded_array}\n') print(f'decoded array: {decoded_array.shape}\n{decoded_array}') self.assertEqual(decoded_array[0], 'BoF') self.assertEqual(decoded_array[1], 'rest') self.assertEqual(decoded_array[2], 'C4') self.assertEqual(decoded_array[3], 'A-1') self.assertEqual(decoded_array[4], 'F#7') self.assertEqual(decoded_array[5], 'EoF')
def test_label_encode_note_name(self): test_array = np.array(['BoF', 'rest', 'C4', 'A-1', 'F#7', 'EoF']) encoded_array = encoder.encode_ground_truth_array( test_array, current_encoding=None, desired_encoding='label') print(f'\n\ntest array: {test_array.shape}\n{test_array}\n') print(f'encoded array: {encoded_array.shape}\n{encoded_array}') self.assertEqual(encoded_array[0], 89) self.assertEqual(encoded_array[1], 0) self.assertEqual(encoded_array[2], 40) self.assertEqual(encoded_array[3], 12) self.assertEqual(encoded_array[4], 82) self.assertEqual(encoded_array[5], 90)
def test_midi_pitch_encode_note_name(self): test_array = np.array(['BoF', 'rest', 'C4', 'A-1', 'F#7', 'EoF']) encoded_array = encoder.encode_ground_truth_array( test_array, current_encoding=None, desired_encoding='midi_pitch') print(f'\n\ntest array: {test_array.shape}\n{test_array}\n') print(f'encoded array: {encoded_array.shape}\n{encoded_array}') self.assertEqual(encoded_array[0], encoder.BoF_MIDI_ENCODING) self.assertEqual(encoded_array[1], encoder.REST_MIDI_ENCODING) self.assertEqual(encoded_array[2], 60) self.assertEqual(encoded_array[3], 32) self.assertEqual(encoded_array[4], 102) self.assertEqual(encoded_array[5], encoder.EoF_MIDI_ENCODING)
def test_midi_encode_code_label(self): encoded_array = np.array([89, 0, 40, 12, 82, 90]) decoded_array = encoder.encode_ground_truth_array( encoded_array, current_encoding='label', desired_encoding='midi_pitch') print(f'\n\nencoded array: {encoded_array.shape}\n{encoded_array}\n') print(f'decoded array: {decoded_array.shape}\n{decoded_array}') self.assertEqual(decoded_array[0], encoder.BoF_MIDI_ENCODING) self.assertEqual(decoded_array[1], encoder.REST_MIDI_ENCODING) self.assertEqual(decoded_array[2], 60) self.assertEqual(decoded_array[3], 32) self.assertEqual(decoded_array[4], 102) self.assertEqual(decoded_array[5], encoder.EoF_MIDI_ENCODING)
def test_one_hot_encode_label(self): test_array = np.array([0, 40, 12, 82]) encoded_array = encoder.encode_ground_truth_array( test_array, current_encoding='label', desired_encoding='one_hot', for_rnn=False) print(f'\n\ntest array: {test_array.shape}\n{test_array}\n') print( f'encoded array: {encoded_array.shape} of {encoded_array[0].shape}\n{encoded_array}' ) self.assertEqual(len(encoded_array[0]), 89) self.assertEqual(encoded_array[0][0], 1) self.assertEqual(encoded_array[1][40], 1) self.assertEqual(encoded_array[2][12], 1) self.assertEqual(encoded_array[3][82], 1)
def test_one_hot_encode_midi_pitch(self): test_array = np.array([encoder.REST_MIDI_ENCODING, 60, 32, 102]) encoded_array = encoder.encode_ground_truth_array( test_array, current_encoding='midi_pitch', desired_encoding='one_hot', for_rnn=False) print(f'\n\ntest array: {test_array.shape}\n{test_array}\n') print( f'encoded array: {encoded_array.shape} of {encoded_array[0].shape}\n{encoded_array}' ) self.assertEqual(len(encoded_array[0]), 89) self.assertEqual(encoded_array[0][0], 1) self.assertEqual(encoded_array[1][40], 1) self.assertEqual(encoded_array[2][12], 1) self.assertEqual(encoded_array[3][82], 1)
def test_one_hot_encode_note_name(self): test_array = np.array(['rest', 'C4', 'A-1', 'F#7']) encoded_array = encoder.encode_ground_truth_array( test_array, current_encoding=None, desired_encoding='one_hot', for_rnn=False) print(f'\n\ntest array: {test_array.shape}\n{test_array}\n') print( f'encoded array: {encoded_array.shape} of {encoded_array[0].shape}\n{encoded_array}' ) self.assertEqual(len(encoded_array[0]), 89) self.assertEqual(encoded_array[0][0], 1) self.assertEqual(encoded_array[1][40], 1) self.assertEqual(encoded_array[2][12], 1) self.assertEqual(encoded_array[3][82], 1)
def test_label_encode_midi_pitch(self): test_array = np.array([ encoder.BoF_MIDI_ENCODING, encoder.REST_MIDI_ENCODING, 60, 32, 102, encoder.EoF_MIDI_ENCODING ]) encoded_array = encoder.encode_ground_truth_array( test_array, current_encoding='midi_pitch', desired_encoding='label') print(f'\n\ntest array: {test_array.shape}\n{test_array}\n') print(f'encoded array: {encoded_array.shape}\n{encoded_array}') self.assertEqual(encoded_array[0], 89) self.assertEqual(encoded_array[1], 0) self.assertEqual(encoded_array[2], 40) self.assertEqual(encoded_array[3], 12) self.assertEqual(encoded_array[4], 82) self.assertEqual(encoded_array[5], 90)
def test_decode_midi_pitch(self): encoded_array = np.array([ encoder.BoF_MIDI_ENCODING, encoder.REST_MIDI_ENCODING, 60, 32, 102, encoder.EoF_MIDI_ENCODING ]) decoded_array = encoder.encode_ground_truth_array( encoded_array, current_encoding='midi_pitch', desired_encoding=None) print(f'\n\nencoded array: {encoded_array.shape}\n{encoded_array}\n') print(f'decoded array: {decoded_array.shape}\n{decoded_array}') self.assertEqual(decoded_array[0], 'BoF') self.assertEqual(decoded_array[1], 'rest') self.assertEqual(decoded_array[2], 'C4') self.assertEqual(decoded_array[3], 'A-1') self.assertEqual(decoded_array[4], 'F#7') self.assertEqual(decoded_array[5], 'EoF')
def balance_rests(x, y, sources=None, encoding=None, printing=False, current_encoding=None): # sanity-test inputs if sources is not None: assert len(sources) == len(x) == len(y) else: assert len(x) == len(y) y_targets, y_counts = np.unique(y, return_counts=True) if current_encoding is None: average_non_rest_count = np.average(y_counts[:-1]) else: average_non_rest_count = np.average(y_counts[1:]) number_of_rests_to_keep = ceil(average_non_rest_count) rest_representation = encode_ground_truth_array(np.array(['rest']), current_encoding=None, desired_encoding=encoding)[0] rest_indices = np.where(y == rest_representation)[0] np.random.seed(42) np.random.shuffle(rest_indices) rests_to_keep_indices = rest_indices[:number_of_rests_to_keep] rests_to_drop_indices = rest_indices[number_of_rests_to_keep:] x_new = np.delete(x, rests_to_drop_indices, axis=0) y_new = np.delete(y, rests_to_drop_indices, axis=0) if sources is not None: sources = np.delete(sources, rests_to_drop_indices, axis=0) if printing: print(f' number of rests: {rest_indices.size}') print(f'number of rests to keep: {number_of_rests_to_keep}') print(f' split: {rests_to_keep_indices.size} | {rests_to_drop_indices.size}\n') print(f' x: {x.shape}\n{x}\n') print(f'x_new: {x_new.shape}\n{x_new}\n') print(f' y: {y.shape}\n{y}\n') print(f'y_new: {y_new.shape}\n{y_new}\n') if sources is None: return x_new, y_new else: return x_new, y_new, sources
def make_prediction(encoder_inputs_val, decoder_inputs_val, sample, model_name='rnn_label_freq_50_powers', printing=True, saving=False): encoder_model, decoder_model = load_rnn_model(model_name) file_name = f'validation_sample_{sample}' max_length = encoder_inputs_val.shape[1] - 2 ground_truth = decoder_inputs_val[sample][1:-1] ground_truth = ground_truth.reshape(ground_truth.shape[0]) ground_truth = encode_ground_truth_array(ground_truth, current_encoding='label', desired_encoding=None) ground_truth_list = list() i = 0 while i < len(ground_truth) and ground_truth[i] != 'EoF': ground_truth_list.insert(0, ground_truth[i]) i += 1 ground_truth = np.array(ground_truth_list)[::-1] predicted_sequence = decode_sequence(encoder_inputs_val[sample][1:-1], encoder_model, decoder_model, max_length) if printing: print(f'predicted sequence: {predicted_sequence.shape}\n{predicted_sequence}\n') print(f'ground truth: {ground_truth.shape}\n{ground_truth}\n') if saving: f = open(f'txt_files/{file_name}.txt', 'w') f.write(' time step: ') for time_step in range(max(len(predicted_sequence), len(ground_truth))): f.write(f'{time_step:<5}') f.write('\n') f.write(' ground truth: ') for pitch in ground_truth: if pitch == 'EoF': break f.write(f'{pitch:<5}') f.write('\n') f.write('model predictions: ') for pitch in predicted_sequence: f.write(f'{pitch:<5}') f.close() return predicted_sequence
def encode(y, current_encoding=None, target_encoding='label'): y = encode_ground_truth_array(y, current_encoding=current_encoding, desired_encoding=target_encoding) return y