Python split_data_by_phrase Examples, prepare_dataset.split_data_by_phrase Python Examples

Example #1

0

Show file

def save_phrase_datasets(process_data_func=dataframe_to_note_set_progression,
                         prefix=""):
    train_phrases, val_phrases, test_phrases = split_data_by_phrase(
        "data/phrases.txt")

    train_dataset = get_dataset(
        "data/all_annotations.csv",
        train_phrases,
        process_data_func,
        augment=True,
        skip_short_phrases=SHORT_PHRASE_LEN,
        skip_repetitions=SKIP_REPETITIONS,
        skip_double_repetitions=SKIP_DOUBLE_REPETITIONS)
    val_dataset = get_dataset("data/all_annotations.csv",
                              val_phrases,
                              process_data_func,
                              augment=False,
                              skip_short_phrases=SHORT_PHRASE_LEN,
                              skip_repetitions=SKIP_REPETITIONS,
                              skip_double_repetitions=SKIP_DOUBLE_REPETITIONS)
    test_dataset = get_dataset("data/all_annotations.csv",
                               test_phrases,
                               process_data_func,
                               augment=False,
                               skip_short_phrases=SHORT_PHRASE_LEN,
                               skip_repetitions=SKIP_REPETITIONS,
                               skip_double_repetitions=SKIP_DOUBLE_REPETITIONS)

    with open("data/train_{}phrases.txt".format(prefix), "w") as f:
        f.write("\n".join(train_dataset))
    with open("data/val_{}phrases.txt".format(prefix), "w") as f:
        f.write("\n".join(val_dataset))
    with open("data/test_{}phrases.txt".format(prefix), "w") as f:
        f.write("\n".join(test_dataset))

Example #2

0

Show file

def convert_phrases_to_root_progression():
    phrase_list, _, _ = split_data_by_phrase("data/phrases.txt",
                                             split_ratio=[1, 0, 0],
                                             shuffle=False)
    dataset = get_dataset("data/all_annotations.csv",
                          phrase_list,
                          dataframe_to_root_progression,
                          skip_short_phrases=SHORT_PHRASE_LEN,
                          skip_repetitions=SKIP_REPETITIONS,
                          skip_double_repetitions=SKIP_DOUBLE_REPETITIONS)

    with open("data/phrases_root.txt", "w") as f:
        f.write("\n".join(dataset))

Example #3

0

Show file

def histogram_chords(chord_output, is_sorted=True):
    if is_sorted:
        process_data_func = dataframe_to_note_set_progression_sorted
    else:
        process_data_func = dataframe_to_note_set_progression

    phrase_list, _, _ = split_data_by_phrase("data/phrases.txt",
                                             split_ratio=[1, 0, 0],
                                             shuffle=False)
    dataset = get_dataset("data/all_annotations.csv",
                          phrase_list,
                          process_data_func,
                          skip_short_phrases=SHORT_PHRASE_LEN,
                          skip_repetitions=SKIP_REPETITIONS,
                          augment=True,
                          augment_func=transpose_phrase_to_c_maj_or_a_min)

    unique_chords = set()
    progression_list = []
    for phrase in dataset:
        progression = phrase.split(" ")
        progression = convert_to_chord_name(progression,
                                            output=chord_output,
                                            is_sorted=is_sorted)
        progression = progression.split(" ")
        unique_chords.update(progression)
        progression_list.append(progression)

    unique_chords = list(unique_chords)
    unique_chords = sorted(unique_chords)
    n_rows = len(unique_chords)
    n_cols = n_rows + 1
    table = np.zeros((n_rows, n_cols))

    for progression in progression_list:
        for i in range(len(progression) - 1):
            start_chord = progression[i]
            end_chord = progression[i + 1]
            start = unique_chords.index(start_chord)
            end = unique_chords.index(end_chord)
            table[start][end] += 1
        last = unique_chords.index(progression[-1])
        table[last][n_cols - 1] += 1

    return table, unique_chords

Example #4

0

Show file

def print_histogram_root(name="root_histogram"):
    if TRANSPOSE:
        augment_func = transpose_phrase_to_c_maj_or_a_min
    else:
        augment_func = None

    phrase_list, _, _ = split_data_by_phrase("data/phrases.txt",
                                             split_ratio=[1, 0, 0],
                                             shuffle=False)
    dataset = get_dataset("data/all_annotations.csv",
                          phrase_list,
                          dataframe_to_root_progression,
                          skip_short_phrases=SHORT_PHRASE_LEN,
                          skip_repetitions=SKIP_REPETITIONS,
                          augment=TRANSPOSE,
                          augment_func=augment_func)

    unique_chords = set()
    progression_list = []
    for phrase in dataset:
        progression = phrase.split(" ")
        unique_chords.update(progression)
        progression_list.append(progression)

    unique_chords = list(unique_chords)
    unique_chords = sorted(unique_chords)
    n_rows = len(unique_chords)
    counter = np.zeros((n_rows, ))

    for progression in progression_list:
        for root in progression:
            index = unique_chords.index(root)
            counter[index] += 1

    plt.rcParams['figure.figsize'] = (20, 20)
    plt.bar(np.arange(n_rows), counter)
    print(n_rows)
    plt.xticks(np.arange(n_rows), unique_chords, rotation=0, fontsize=10)
    plt.savefig("figures/{}{} ({:d}).pdf".format(name, SUFFIX,
                                                 int(np.sum(counter))))

Example #5

0

Show file

def convert_phrases():
    phrase_list, _, _ = split_data_by_phrase("data/phrases.txt",
                                             split_ratio=[1, 0, 0],
                                             shuffle=False)
    dataset = get_dataset("data/all_annotations.csv",
                          phrase_list,
                          dataframe_to_note_set_progression,
                          skip_short_phrases=SHORT_PHRASE_LEN,
                          skip_repetitions=SKIP_REPETITIONS,
                          skip_double_repetitions=SKIP_DOUBLE_REPETITIONS)

    with open("data/phrases_note_set.txt", "w") as f:
        f.write("\n".join(dataset))

    progression_list = []
    for phrase in dataset:
        progression = phrase.split(" ")
        progression = convert_to_chord_name(progression)
        progression_list.append(progression)

    with open("data/phrases_name.txt", "w") as f:
        f.write("\n".join(progression_list))

Example #6

0

Show file

def save_transposed_datasets():
    train_phrases, val_phrases, test_phrases = split_data_by_phrase(
        "data/phrases.txt")

    process_data_func = dataframe_to_note_set_progression
    augment_func = transpose_phrase_to_c_maj_or_a_min

    train_dataset = get_dataset(
        "data/all_annotations.csv",
        train_phrases,
        process_data_func,
        augment=True,
        skip_short_phrases=SHORT_PHRASE_LEN,
        skip_repetitions=SKIP_REPETITIONS,
        skip_double_repetitions=SKIP_DOUBLE_REPETITIONS,
        augment_func=augment_func)
    val_dataset = get_dataset("data/all_annotations.csv",
                              val_phrases,
                              process_data_func,
                              augment=True,
                              skip_short_phrases=SHORT_PHRASE_LEN,
                              skip_repetitions=SKIP_REPETITIONS,
                              skip_double_repetitions=SKIP_DOUBLE_REPETITIONS,
                              augment_func=augment_func)
    test_dataset = get_dataset("data/all_annotations.csv",
                               test_phrases,
                               process_data_func,
                               augment=True,
                               skip_short_phrases=SHORT_PHRASE_LEN,
                               skip_repetitions=SKIP_REPETITIONS,
                               skip_double_repetitions=SKIP_DOUBLE_REPETITIONS,
                               augment_func=augment_func)

    with open("data/train_transposed_phrases.txt", "w") as f:
        f.write("\n".join(train_dataset))
    with open("data/val_transposed_phrases.txt", "w") as f:
        f.write("\n".join(val_dataset))
    with open("data/test_transposed_phrases.txt", "w") as f:
        f.write("\n".join(test_dataset))

Example #7

0

Show file

def histogram_transposed_local_key_root(
        process_data_func=dataframe_to_root_progression):
    phrase_list, _, _ = split_data_by_phrase("data/phrases.txt",
                                             split_ratio=[1, 0, 0],
                                             shuffle=False)
    dataset, modulation_list = get_dataset_and_modulation(
        "data/all_annotations.csv",
        phrase_list,
        process_data_func,
        skip_short_phrases=SHORT_PHRASE_LEN,
        skip_repetitions=SKIP_REPETITIONS)

    unique_chords = set()
    progression_list = []
    for phrase in dataset:
        progression = phrase.split(" ")
        unique_chords.update(progression)
        progression_list.append(progression)

    unique_chords = list(unique_chords)
    unique_chords = sorted(unique_chords)
    n_rows = len(unique_chords)
    n_cols = n_rows + 1
    table = np.zeros((n_rows, n_cols))

    for progression in progression_list:
        for i in range(len(progression) - 1):
            start_chord = progression[i]
            end_chord = progression[i + 1]
            start = unique_chords.index(start_chord)
            end = unique_chords.index(end_chord)
            table[start][end] += 1
        last = unique_chords.index(progression[-1])
        table[last][n_cols - 1] += 1

    return table, unique_chords, modulation_list