Example #1
0
from scipy.io import wavfile
import numpy as np

THIS_DATA_DIR = dirname(realpath(__file__))
DOWNLOADED_ZIP = join(THIS_DATA_DIR, "dataset.zip")
DOWNLOADED_DIR = join(THIS_DATA_DIR, "dataset")
FILE_URL = "http://c4dm.eecs.qmul.ac.uk/rdr/bitstream/handle/123456789/13/Score-informed%20Piano%20Transcription%20Dataset.zip?sequence=1"

if __name__ == '__main__':
    if not exists(DOWNLOADED_ZIP):
        execute_bash("wget -O {path} {url}".format(url=FILE_URL,
                                                   path=DOWNLOADED_ZIP))
    if exists(DOWNLOADED_DIR) and isdir(DOWNLOADED_DIR):
        execute_bash("rm -rf %s" % (DOWNLOADED_DIR))
    execute_bash("rm %s " % (join(THIS_DATA_DIR, "*.npy")))
    makedirs(DOWNLOADED_DIR)
    execute_bash("unzip %s -d %s" % (DOWNLOADED_ZIP, DOWNLOADED_DIR))

    files = collect_files_with_ext(DOWNLOADED_DIR, ".wav")

    for subpath, name in files:
        if name.endswith(".wav") and "Chromatic" not in name:
            sampling_rate, music = wavfile.read(subpath)
            np.save(join(THIS_DATA_DIR, name.replace(".wav", ".npy")), music)
            piece = midiread(str(subpath).replace(".wav", "_correct.mid"))
            np.save(join(THIS_DATA_DIR, name.replace(".wav", ".mid.npy")),
                    piece.piano_roll)

    execute_bash("rm -rf %s" % (DOWNLOADED_DIR))
    execute_bash("rm -rf %s" % (DOWNLOADED_ZIP))
Example #2
0
        execute_bash('rm -rf %s' % (path,))

if __name__ == '__main__':
    execute_bash("rm -rf %s" % (ZIP_LOCAL,))
    execute_bash("rm -rf %s" % (UNZIPPED_LOCAL,))
    execute_bash("wget -O {path} {url}".format(url=ZIP_URL, path=ZIP_LOCAL))
    execute_bash("unzip {zipfile} -d {target}".format(zipfile=ZIP_LOCAL, target=UNZIPPED_LOCAL))


    # create test set:
    test_input_names = collect_text_files(UNZIPPED_LOCAL)
    transform_files_into_one(test_input_names, TEST_TOKENIZED_FILE)
    if not tokenizer_available:
        execute_bash("rm %s" % (TEST_TOKENIZED_FILE,))
    delete_paths([path for path, name in test_input_names])

    # untar train files:
    tar_files = collect_files_with_ext(UNZIPPED_LOCAL, ".tgz")
    for tar_file, tar_file_name in tar_files:
        execute_bash("tar -xf %s -C %s" % (tar_file, UNZIPPED_LOCAL))

    # create train set
    train_input_names = collect_text_files(UNZIPPED_LOCAL)
    transform_files_into_one(train_input_names, TRAIN_TOKENIZED_FILE)
    if not tokenizer_available:
        execute_bash("rm %s" % (TRAIN_TOKENIZED_FILE,))

    # remove everything else
    execute_bash("rm -rf %s" % (UNZIPPED_LOCAL))
    execute_bash("rm -rf %s" % (ZIP_LOCAL))
Example #3
0
def collect_text_files(path):
    return [(subpath, name) for subpath, name in collect_files_with_ext(path, ".txt") if is_dataset_input(subpath)]
Example #4
0
from scipy.io import wavfile
import numpy as np

THIS_DATA_DIR  = dirname(realpath(__file__))
DOWNLOADED_ZIP = join(THIS_DATA_DIR, "dataset.zip")
DOWNLOADED_DIR = join(THIS_DATA_DIR, "dataset")
FILE_URL="http://c4dm.eecs.qmul.ac.uk/rdr/bitstream/handle/123456789/13/Score-informed%20Piano%20Transcription%20Dataset.zip?sequence=1"

if __name__ == '__main__':
    if not exists(DOWNLOADED_ZIP):
        execute_bash("wget -O {path} {url}".format(url=FILE_URL, path=DOWNLOADED_ZIP))
    if exists(DOWNLOADED_DIR) and isdir(DOWNLOADED_DIR):
        execute_bash("rm -rf %s" % (DOWNLOADED_DIR))
    execute_bash("rm %s " % (join(THIS_DATA_DIR, "*.npy")))
    makedirs(DOWNLOADED_DIR)
    execute_bash("unzip %s -d %s" % (DOWNLOADED_ZIP, DOWNLOADED_DIR))

    files = collect_files_with_ext(DOWNLOADED_DIR, ".wav")

    for subpath, name in files:
        if name.endswith(".wav") and "Chromatic" not in name:
            sampling_rate, music = wavfile.read(subpath)
            np.save(join(THIS_DATA_DIR, name.replace(".wav", ".npy")), music)
            piece = midiread(str(subpath).replace(".wav", "_correct.mid"))
            np.save(join(THIS_DATA_DIR, name.replace(".wav", ".mid.npy")), piece.piano_roll)

    execute_bash("rm -rf %s" % (DOWNLOADED_DIR))
    execute_bash("rm -rf %s" % (DOWNLOADED_ZIP))