def save_phrase_datasets(process_data_func=dataframe_to_note_set_progression, prefix=""): train_phrases, val_phrases, test_phrases = split_data_by_phrase( "data/phrases.txt") train_dataset = get_dataset( "data/all_annotations.csv", train_phrases, process_data_func, augment=True, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS) val_dataset = get_dataset("data/all_annotations.csv", val_phrases, process_data_func, augment=False, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS) test_dataset = get_dataset("data/all_annotations.csv", test_phrases, process_data_func, augment=False, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS) with open("data/train_{}phrases.txt".format(prefix), "w") as f: f.write("\n".join(train_dataset)) with open("data/val_{}phrases.txt".format(prefix), "w") as f: f.write("\n".join(val_dataset)) with open("data/test_{}phrases.txt".format(prefix), "w") as f: f.write("\n".join(test_dataset))
def convert_phrases_to_root_progression(): phrase_list, _, _ = split_data_by_phrase("data/phrases.txt", split_ratio=[1, 0, 0], shuffle=False) dataset = get_dataset("data/all_annotations.csv", phrase_list, dataframe_to_root_progression, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS) with open("data/phrases_root.txt", "w") as f: f.write("\n".join(dataset))
def histogram_chords(chord_output, is_sorted=True): if is_sorted: process_data_func = dataframe_to_note_set_progression_sorted else: process_data_func = dataframe_to_note_set_progression phrase_list, _, _ = split_data_by_phrase("data/phrases.txt", split_ratio=[1, 0, 0], shuffle=False) dataset = get_dataset("data/all_annotations.csv", phrase_list, process_data_func, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, augment=True, augment_func=transpose_phrase_to_c_maj_or_a_min) unique_chords = set() progression_list = [] for phrase in dataset: progression = phrase.split(" ") progression = convert_to_chord_name(progression, output=chord_output, is_sorted=is_sorted) progression = progression.split(" ") unique_chords.update(progression) progression_list.append(progression) unique_chords = list(unique_chords) unique_chords = sorted(unique_chords) n_rows = len(unique_chords) n_cols = n_rows + 1 table = np.zeros((n_rows, n_cols)) for progression in progression_list: for i in range(len(progression) - 1): start_chord = progression[i] end_chord = progression[i + 1] start = unique_chords.index(start_chord) end = unique_chords.index(end_chord) table[start][end] += 1 last = unique_chords.index(progression[-1]) table[last][n_cols - 1] += 1 return table, unique_chords
def print_histogram_root(name="root_histogram"): if TRANSPOSE: augment_func = transpose_phrase_to_c_maj_or_a_min else: augment_func = None phrase_list, _, _ = split_data_by_phrase("data/phrases.txt", split_ratio=[1, 0, 0], shuffle=False) dataset = get_dataset("data/all_annotations.csv", phrase_list, dataframe_to_root_progression, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, augment=TRANSPOSE, augment_func=augment_func) unique_chords = set() progression_list = [] for phrase in dataset: progression = phrase.split(" ") unique_chords.update(progression) progression_list.append(progression) unique_chords = list(unique_chords) unique_chords = sorted(unique_chords) n_rows = len(unique_chords) counter = np.zeros((n_rows, )) for progression in progression_list: for root in progression: index = unique_chords.index(root) counter[index] += 1 plt.rcParams['figure.figsize'] = (20, 20) plt.bar(np.arange(n_rows), counter) print(n_rows) plt.xticks(np.arange(n_rows), unique_chords, rotation=0, fontsize=10) plt.savefig("figures/{}{} ({:d}).pdf".format(name, SUFFIX, int(np.sum(counter))))
def convert_phrases(): phrase_list, _, _ = split_data_by_phrase("data/phrases.txt", split_ratio=[1, 0, 0], shuffle=False) dataset = get_dataset("data/all_annotations.csv", phrase_list, dataframe_to_note_set_progression, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS) with open("data/phrases_note_set.txt", "w") as f: f.write("\n".join(dataset)) progression_list = [] for phrase in dataset: progression = phrase.split(" ") progression = convert_to_chord_name(progression) progression_list.append(progression) with open("data/phrases_name.txt", "w") as f: f.write("\n".join(progression_list))
def save_transposed_datasets(): train_phrases, val_phrases, test_phrases = split_data_by_phrase( "data/phrases.txt") process_data_func = dataframe_to_note_set_progression augment_func = transpose_phrase_to_c_maj_or_a_min train_dataset = get_dataset( "data/all_annotations.csv", train_phrases, process_data_func, augment=True, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS, augment_func=augment_func) val_dataset = get_dataset("data/all_annotations.csv", val_phrases, process_data_func, augment=True, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS, augment_func=augment_func) test_dataset = get_dataset("data/all_annotations.csv", test_phrases, process_data_func, augment=True, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS, skip_double_repetitions=SKIP_DOUBLE_REPETITIONS, augment_func=augment_func) with open("data/train_transposed_phrases.txt", "w") as f: f.write("\n".join(train_dataset)) with open("data/val_transposed_phrases.txt", "w") as f: f.write("\n".join(val_dataset)) with open("data/test_transposed_phrases.txt", "w") as f: f.write("\n".join(test_dataset))
def histogram_transposed_local_key_root( process_data_func=dataframe_to_root_progression): phrase_list, _, _ = split_data_by_phrase("data/phrases.txt", split_ratio=[1, 0, 0], shuffle=False) dataset, modulation_list = get_dataset_and_modulation( "data/all_annotations.csv", phrase_list, process_data_func, skip_short_phrases=SHORT_PHRASE_LEN, skip_repetitions=SKIP_REPETITIONS) unique_chords = set() progression_list = [] for phrase in dataset: progression = phrase.split(" ") unique_chords.update(progression) progression_list.append(progression) unique_chords = list(unique_chords) unique_chords = sorted(unique_chords) n_rows = len(unique_chords) n_cols = n_rows + 1 table = np.zeros((n_rows, n_cols)) for progression in progression_list: for i in range(len(progression) - 1): start_chord = progression[i] end_chord = progression[i + 1] start = unique_chords.index(start_chord) end = unique_chords.index(end_chord) table[start][end] += 1 last = unique_chords.index(progression[-1]) table[last][n_cols - 1] += 1 return table, unique_chords, modulation_list