from scipy.io import wavfile import numpy as np THIS_DATA_DIR = dirname(realpath(__file__)) DOWNLOADED_ZIP = join(THIS_DATA_DIR, "dataset.zip") DOWNLOADED_DIR = join(THIS_DATA_DIR, "dataset") FILE_URL = "http://c4dm.eecs.qmul.ac.uk/rdr/bitstream/handle/123456789/13/Score-informed%20Piano%20Transcription%20Dataset.zip?sequence=1" if __name__ == '__main__': if not exists(DOWNLOADED_ZIP): execute_bash("wget -O {path} {url}".format(url=FILE_URL, path=DOWNLOADED_ZIP)) if exists(DOWNLOADED_DIR) and isdir(DOWNLOADED_DIR): execute_bash("rm -rf %s" % (DOWNLOADED_DIR)) execute_bash("rm %s " % (join(THIS_DATA_DIR, "*.npy"))) makedirs(DOWNLOADED_DIR) execute_bash("unzip %s -d %s" % (DOWNLOADED_ZIP, DOWNLOADED_DIR)) files = collect_files_with_ext(DOWNLOADED_DIR, ".wav") for subpath, name in files: if name.endswith(".wav") and "Chromatic" not in name: sampling_rate, music = wavfile.read(subpath) np.save(join(THIS_DATA_DIR, name.replace(".wav", ".npy")), music) piece = midiread(str(subpath).replace(".wav", "_correct.mid")) np.save(join(THIS_DATA_DIR, name.replace(".wav", ".mid.npy")), piece.piano_roll) execute_bash("rm -rf %s" % (DOWNLOADED_DIR)) execute_bash("rm -rf %s" % (DOWNLOADED_ZIP))
execute_bash('rm -rf %s' % (path,)) if __name__ == '__main__': execute_bash("rm -rf %s" % (ZIP_LOCAL,)) execute_bash("rm -rf %s" % (UNZIPPED_LOCAL,)) execute_bash("wget -O {path} {url}".format(url=ZIP_URL, path=ZIP_LOCAL)) execute_bash("unzip {zipfile} -d {target}".format(zipfile=ZIP_LOCAL, target=UNZIPPED_LOCAL)) # create test set: test_input_names = collect_text_files(UNZIPPED_LOCAL) transform_files_into_one(test_input_names, TEST_TOKENIZED_FILE) if not tokenizer_available: execute_bash("rm %s" % (TEST_TOKENIZED_FILE,)) delete_paths([path for path, name in test_input_names]) # untar train files: tar_files = collect_files_with_ext(UNZIPPED_LOCAL, ".tgz") for tar_file, tar_file_name in tar_files: execute_bash("tar -xf %s -C %s" % (tar_file, UNZIPPED_LOCAL)) # create train set train_input_names = collect_text_files(UNZIPPED_LOCAL) transform_files_into_one(train_input_names, TRAIN_TOKENIZED_FILE) if not tokenizer_available: execute_bash("rm %s" % (TRAIN_TOKENIZED_FILE,)) # remove everything else execute_bash("rm -rf %s" % (UNZIPPED_LOCAL)) execute_bash("rm -rf %s" % (ZIP_LOCAL))
def collect_text_files(path): return [(subpath, name) for subpath, name in collect_files_with_ext(path, ".txt") if is_dataset_input(subpath)]
from scipy.io import wavfile import numpy as np THIS_DATA_DIR = dirname(realpath(__file__)) DOWNLOADED_ZIP = join(THIS_DATA_DIR, "dataset.zip") DOWNLOADED_DIR = join(THIS_DATA_DIR, "dataset") FILE_URL="http://c4dm.eecs.qmul.ac.uk/rdr/bitstream/handle/123456789/13/Score-informed%20Piano%20Transcription%20Dataset.zip?sequence=1" if __name__ == '__main__': if not exists(DOWNLOADED_ZIP): execute_bash("wget -O {path} {url}".format(url=FILE_URL, path=DOWNLOADED_ZIP)) if exists(DOWNLOADED_DIR) and isdir(DOWNLOADED_DIR): execute_bash("rm -rf %s" % (DOWNLOADED_DIR)) execute_bash("rm %s " % (join(THIS_DATA_DIR, "*.npy"))) makedirs(DOWNLOADED_DIR) execute_bash("unzip %s -d %s" % (DOWNLOADED_ZIP, DOWNLOADED_DIR)) files = collect_files_with_ext(DOWNLOADED_DIR, ".wav") for subpath, name in files: if name.endswith(".wav") and "Chromatic" not in name: sampling_rate, music = wavfile.read(subpath) np.save(join(THIS_DATA_DIR, name.replace(".wav", ".npy")), music) piece = midiread(str(subpath).replace(".wav", "_correct.mid")) np.save(join(THIS_DATA_DIR, name.replace(".wav", ".mid.npy")), piece.piano_roll) execute_bash("rm -rf %s" % (DOWNLOADED_DIR)) execute_bash("rm -rf %s" % (DOWNLOADED_ZIP))